123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407 |
- // Tencent is pleased to support the open source community by making RapidJSON
- // available.
- //
- // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All
- // rights reserved.
- //
- // Licensed under the MIT License (the "License"); you may not use this file
- // except in compliance with the License. You may obtain a copy of the License
- // at
- //
- // http://opensource.org/licenses/MIT
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- // License for the specific language governing permissions and limitations under
- // the License.
- #ifndef RAPIDJSON_ENCODEDSTREAM_H_
- #define RAPIDJSON_ENCODEDSTREAM_H_
- #include "memorystream.h"
- #include "stream.h"
- #ifdef __GNUC__
- RAPIDJSON_DIAG_PUSH
- RAPIDJSON_DIAG_OFF(effc++)
- #endif
- #ifdef __clang__
- RAPIDJSON_DIAG_PUSH
- RAPIDJSON_DIAG_OFF(padded)
- #endif
- RAPIDJSON_NAMESPACE_BEGIN
- //! Input byte stream wrapper with a statically bound encoding.
- /*!
- \tparam Encoding The interpretation of encoding of the stream. Either UTF8,
- UTF16LE, UTF16BE, UTF32LE, UTF32BE. \tparam InputByteStream Type of input
- byte stream. For example, FileReadStream.
- */
- template <typename Encoding, typename InputByteStream>
- class EncodedInputStream {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- public:
- typedef typename Encoding::Ch Ch;
- EncodedInputStream(InputByteStream &is) : is_(is) {
- current_ = Encoding::TakeBOM(is_);
- }
- Ch Peek() const { return current_; }
- Ch Take() {
- Ch c = current_;
- current_ = Encoding::Take(is_);
- return c;
- }
- size_t Tell() const { return is_.Tell(); }
- // Not implemented
- void Put(Ch) { RAPIDJSON_ASSERT(false); }
- void Flush() { RAPIDJSON_ASSERT(false); }
- Ch *PutBegin() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t PutEnd(Ch *) {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- private:
- EncodedInputStream(const EncodedInputStream &);
- EncodedInputStream &operator=(const EncodedInputStream &);
- InputByteStream &is_;
- Ch current_;
- };
- //! Specialized for UTF8 MemoryStream.
- template <>
- class EncodedInputStream<UTF8<>, MemoryStream> {
- public:
- typedef UTF8<>::Ch Ch;
- EncodedInputStream(MemoryStream &is) : is_(is) {
- if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
- if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
- if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
- }
- Ch Peek() const { return is_.Peek(); }
- Ch Take() { return is_.Take(); }
- size_t Tell() const { return is_.Tell(); }
- // Not implemented
- void Put(Ch) {}
- void Flush() {}
- Ch *PutBegin() { return 0; }
- size_t PutEnd(Ch *) { return 0; }
- MemoryStream &is_;
- private:
- EncodedInputStream(const EncodedInputStream &);
- EncodedInputStream &operator=(const EncodedInputStream &);
- };
- //! Output byte stream wrapper with statically bound encoding.
- /*!
- \tparam Encoding The interpretation of encoding of the stream. Either UTF8,
- UTF16LE, UTF16BE, UTF32LE, UTF32BE. \tparam OutputByteStream Type of input
- byte stream. For example, FileWriteStream.
- */
- template <typename Encoding, typename OutputByteStream>
- class EncodedOutputStream {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- public:
- typedef typename Encoding::Ch Ch;
- EncodedOutputStream(OutputByteStream &os, bool putBOM = true) : os_(os) {
- if (putBOM) Encoding::PutBOM(os_);
- }
- void Put(Ch c) { Encoding::Put(os_, c); }
- void Flush() { os_.Flush(); }
- // Not implemented
- Ch Peek() const {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- Ch Take() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t Tell() const {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- Ch *PutBegin() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t PutEnd(Ch *) {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- private:
- EncodedOutputStream(const EncodedOutputStream &);
- EncodedOutputStream &operator=(const EncodedOutputStream &);
- OutputByteStream &os_;
- };
- #define RAPIDJSON_ENCODINGS_FUNC(x) \
- UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
- //! Input stream wrapper with dynamically bound encoding and automatic encoding
- //! detection.
- /*!
- \tparam CharType Type of character for reading.
- \tparam InputByteStream type of input byte stream to be wrapped.
- */
- template <typename CharType, typename InputByteStream>
- class AutoUTFInputStream {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- public:
- typedef CharType Ch;
- //! Constructor.
- /*!
- \param is input stream to be wrapped.
- \param type UTF encoding type if it is not detected from the stream.
- */
- AutoUTFInputStream(InputByteStream &is, UTFType type = kUTF8)
- : is_(&is), type_(type), hasBOM_(false) {
- RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
- DetectType();
- static const TakeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Take)};
- takeFunc_ = f[type_];
- current_ = takeFunc_(*is_);
- }
- UTFType GetType() const { return type_; }
- bool HasBOM() const { return hasBOM_; }
- Ch Peek() const { return current_; }
- Ch Take() {
- Ch c = current_;
- current_ = takeFunc_(*is_);
- return c;
- }
- size_t Tell() const { return is_->Tell(); }
- // Not implemented
- void Put(Ch) { RAPIDJSON_ASSERT(false); }
- void Flush() { RAPIDJSON_ASSERT(false); }
- Ch *PutBegin() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t PutEnd(Ch *) {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- private:
- AutoUTFInputStream(const AutoUTFInputStream &);
- AutoUTFInputStream &operator=(const AutoUTFInputStream &);
- // Detect encoding type with BOM or RFC 4627
- void DetectType() {
- // BOM (Byte Order Mark):
- // 00 00 FE FF UTF-32BE
- // FF FE 00 00 UTF-32LE
- // FE FF UTF-16BE
- // FF FE UTF-16LE
- // EF BB BF UTF-8
- const unsigned char *c =
- reinterpret_cast<const unsigned char *>(is_->Peek4());
- if (!c) return;
- unsigned bom =
- static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
- hasBOM_ = false;
- if (bom == 0xFFFE0000) {
- type_ = kUTF32BE;
- hasBOM_ = true;
- is_->Take();
- is_->Take();
- is_->Take();
- is_->Take();
- } else if (bom == 0x0000FEFF) {
- type_ = kUTF32LE;
- hasBOM_ = true;
- is_->Take();
- is_->Take();
- is_->Take();
- is_->Take();
- } else if ((bom & 0xFFFF) == 0xFFFE) {
- type_ = kUTF16BE;
- hasBOM_ = true;
- is_->Take();
- is_->Take();
- } else if ((bom & 0xFFFF) == 0xFEFF) {
- type_ = kUTF16LE;
- hasBOM_ = true;
- is_->Take();
- is_->Take();
- } else if ((bom & 0xFFFFFF) == 0xBFBBEF) {
- type_ = kUTF8;
- hasBOM_ = true;
- is_->Take();
- is_->Take();
- is_->Take();
- }
- // RFC 4627: Section 3
- // "Since the first two characters of a JSON text will always be ASCII
- // characters [RFC0020], it is possible to determine whether an octet
- // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
- // at the pattern of nulls in the first four octets."
- // 00 00 00 xx UTF-32BE
- // 00 xx 00 xx UTF-16BE
- // xx 00 00 00 UTF-32LE
- // xx 00 xx 00 UTF-16LE
- // xx xx xx xx UTF-8
- if (!hasBOM_) {
- int pattern =
- (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
- switch (pattern) {
- case 0x08:
- type_ = kUTF32BE;
- break;
- case 0x0A:
- type_ = kUTF16BE;
- break;
- case 0x01:
- type_ = kUTF32LE;
- break;
- case 0x05:
- type_ = kUTF16LE;
- break;
- case 0x0F:
- type_ = kUTF8;
- break;
- default:
- break; // Use type defined by user.
- }
- }
- // Runtime check whether the size of character type is sufficient. It only
- // perform checks with assertion.
- if (type_ == kUTF16LE || type_ == kUTF16BE)
- RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
- if (type_ == kUTF32LE || type_ == kUTF32BE)
- RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
- }
- typedef Ch (*TakeFunc)(InputByteStream &is);
- InputByteStream *is_;
- UTFType type_;
- Ch current_;
- TakeFunc takeFunc_;
- bool hasBOM_;
- };
- //! Output stream wrapper with dynamically bound encoding and automatic encoding
- //! detection.
- /*!
- \tparam CharType Type of character for writing.
- \tparam OutputByteStream type of output byte stream to be wrapped.
- */
- template <typename CharType, typename OutputByteStream>
- class AutoUTFOutputStream {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- public:
- typedef CharType Ch;
- //! Constructor.
- /*!
- \param os output stream to be wrapped.
- \param type UTF encoding type.
- \param putBOM Whether to write BOM at the beginning of the stream.
- */
- AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
- : os_(&os), type_(type) {
- RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
- // Runtime check whether the size of character type is sufficient. It only
- // perform checks with assertion.
- if (type_ == kUTF16LE || type_ == kUTF16BE)
- RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
- if (type_ == kUTF32LE || type_ == kUTF32BE)
- RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
- static const PutFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Put)};
- putFunc_ = f[type_];
- if (putBOM) PutBOM();
- }
- UTFType GetType() const { return type_; }
- void Put(Ch c) { putFunc_(*os_, c); }
- void Flush() { os_->Flush(); }
- // Not implemented
- Ch Peek() const {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- Ch Take() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t Tell() const {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- Ch *PutBegin() {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- size_t PutEnd(Ch *) {
- RAPIDJSON_ASSERT(false);
- return 0;
- }
- private:
- AutoUTFOutputStream(const AutoUTFOutputStream &);
- AutoUTFOutputStream &operator=(const AutoUTFOutputStream &);
- void PutBOM() {
- typedef void (*PutBOMFunc)(OutputByteStream &);
- static const PutBOMFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(PutBOM)};
- f[type_](*os_);
- }
- typedef void (*PutFunc)(OutputByteStream &, Ch);
- OutputByteStream *os_;
- UTFType type_;
- PutFunc putFunc_;
- };
- #undef RAPIDJSON_ENCODINGS_FUNC
- RAPIDJSON_NAMESPACE_END
- #ifdef __clang__
- RAPIDJSON_DIAG_POP
- #endif
- #ifdef __GNUC__
- RAPIDJSON_DIAG_POP
- #endif
- #endif // RAPIDJSON_FILESTREAM_H_
|