123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816 |
- // Tencent is pleased to support the open source community by making RapidJSON
- // available.
- //
- // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All
- // rights reserved.
- //
- // Licensed under the MIT License (the "License"); you may not use this file
- // except in compliance with the License. You may obtain a copy of the License
- // at
- //
- // http://opensource.org/licenses/MIT
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- // License for the specific language governing permissions and limitations under
- // the License.
- #ifndef RAPIDJSON_ENCODINGS_H_
- #define RAPIDJSON_ENCODINGS_H_
- #include "rapidjson.h"
- #if defined(_MSC_VER) && !defined(__clang__)
- RAPIDJSON_DIAG_PUSH
- RAPIDJSON_DIAG_OFF(
- 4244) // conversion from 'type1' to 'type2', possible loss of data
- RAPIDJSON_DIAG_OFF(4702) // unreachable code
- #elif defined(__GNUC__)
- RAPIDJSON_DIAG_PUSH
- RAPIDJSON_DIAG_OFF(effc++)
- RAPIDJSON_DIAG_OFF(overflow)
- #endif
- RAPIDJSON_NAMESPACE_BEGIN
- ///////////////////////////////////////////////////////////////////////////////
- // Encoding
- /*! \class rapidjson::Encoding
- \brief Concept for encoding of Unicode characters.
- \code
- concept Encoding {
- typename Ch; //! Type of character. A "character" is actually a code unit
- in unicode's definition.
- enum { supportUnicode = 1 }; // or 0 if not supporting unicode
- //! \brief Encode a Unicode codepoint to an output stream.
- //! \param os Output stream.
- //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF
- inclusively. template<typename OutputStream> static void Encode(OutputStream&
- os, unsigned codepoint);
- //! \brief Decode a Unicode codepoint from an input stream.
- //! \param is Input stream.
- //! \param codepoint Output of the unicode codepoint.
- //! \return true if a valid codepoint can be decoded from the stream.
- template <typename InputStream>
- static bool Decode(InputStream& is, unsigned* codepoint);
- //! \brief Validate one Unicode codepoint from an encoded stream.
- //! \param is Input stream to obtain codepoint.
- //! \param os Output for copying one codepoint.
- //! \return true if it is valid.
- //! \note This function just validating and copying the codepoint without
- actually decode it. template <typename InputStream, typename OutputStream>
- static bool Validate(InputStream& is, OutputStream& os);
- // The following functions are deal with byte streams.
- //! Take a character from input byte stream, skip BOM if exist.
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream& is);
- //! Take a character from input byte stream.
- template <typename InputByteStream>
- static Ch Take(InputByteStream& is);
- //! Put BOM to output byte stream.
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream& os);
- //! Put a character to output byte stream.
- template <typename OutputByteStream>
- static void Put(OutputByteStream& os, Ch c);
- };
- \endcode
- */
- ///////////////////////////////////////////////////////////////////////////////
- // UTF8
- //! UTF-8 encoding.
- /*! http://en.wikipedia.org/wiki/UTF-8
- http://tools.ietf.org/html/rfc3629
- \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
- \note implements Encoding concept
- */
- template <typename CharType = char>
- struct UTF8 {
- typedef CharType Ch;
- enum { supportUnicode = 1 };
- template <typename OutputStream>
- static void Encode(OutputStream &os, unsigned codepoint) {
- if (codepoint <= 0x7F)
- os.Put(static_cast<Ch>(codepoint & 0xFF));
- else if (codepoint <= 0x7FF) {
- os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
- os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
- } else if (codepoint <= 0xFFFF) {
- os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
- os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
- os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
- } else {
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
- os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
- os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
- os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
- }
- }
- template <typename OutputStream>
- static void EncodeUnsafe(OutputStream &os, unsigned codepoint) {
- if (codepoint <= 0x7F)
- PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
- else if (codepoint <= 0x7FF) {
- PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
- PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
- } else if (codepoint <= 0xFFFF) {
- PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
- PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
- PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
- } else {
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
- PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
- PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
- PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
- }
- }
- template <typename InputStream>
- static bool Decode(InputStream &is, unsigned *codepoint) {
- #define RAPIDJSON_COPY() \
- c = is.Take(); \
- *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
- #define RAPIDJSON_TRANS(mask) \
- result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
- #define RAPIDJSON_TAIL() \
- RAPIDJSON_COPY(); \
- RAPIDJSON_TRANS(0x70)
- typename InputStream::Ch c = is.Take();
- if (!(c & 0x80)) {
- *codepoint = static_cast<unsigned char>(c);
- return true;
- }
- unsigned char type = GetRange(static_cast<unsigned char>(c));
- if (type >= 32) {
- *codepoint = 0;
- } else {
- *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
- }
- bool result = true;
- switch (type) {
- case 2:
- RAPIDJSON_TAIL();
- return result;
- case 3:
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 4:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x50);
- RAPIDJSON_TAIL();
- return result;
- case 5:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x10);
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 6:
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 10:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x20);
- RAPIDJSON_TAIL();
- return result;
- case 11:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x60);
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- default:
- return false;
- }
- #undef RAPIDJSON_COPY
- #undef RAPIDJSON_TRANS
- #undef RAPIDJSON_TAIL
- }
- template <typename InputStream, typename OutputStream>
- static bool Validate(InputStream &is, OutputStream &os) {
- #define RAPIDJSON_COPY() os.Put(c = is.Take())
- #define RAPIDJSON_TRANS(mask) \
- result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
- #define RAPIDJSON_TAIL() \
- RAPIDJSON_COPY(); \
- RAPIDJSON_TRANS(0x70)
- Ch c;
- RAPIDJSON_COPY();
- if (!(c & 0x80)) return true;
- bool result = true;
- switch (GetRange(static_cast<unsigned char>(c))) {
- case 2:
- RAPIDJSON_TAIL();
- return result;
- case 3:
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 4:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x50);
- RAPIDJSON_TAIL();
- return result;
- case 5:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x10);
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 6:
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- case 10:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x20);
- RAPIDJSON_TAIL();
- return result;
- case 11:
- RAPIDJSON_COPY();
- RAPIDJSON_TRANS(0x60);
- RAPIDJSON_TAIL();
- RAPIDJSON_TAIL();
- return result;
- default:
- return false;
- }
- #undef RAPIDJSON_COPY
- #undef RAPIDJSON_TRANS
- #undef RAPIDJSON_TAIL
- }
- static unsigned char GetRange(unsigned char c) {
- // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation
- // can test multiple types.
- static const unsigned char type[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x10, 0x10, 0x10,
- 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
- 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
- 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
- 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
- 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
- 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
- 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8,
- };
- return type[c];
- }
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- typename InputByteStream::Ch c = Take(is);
- if (static_cast<unsigned char>(c) != 0xEFu) return c;
- c = is.Take();
- if (static_cast<unsigned char>(c) != 0xBBu) return c;
- c = is.Take();
- if (static_cast<unsigned char>(c) != 0xBFu) return c;
- c = is.Take();
- return c;
- }
- template <typename InputByteStream>
- static Ch Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- return static_cast<Ch>(is.Take());
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, Ch c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(c));
- }
- };
- ///////////////////////////////////////////////////////////////////////////////
- // UTF16
- //! UTF-16 encoding.
- /*! http://en.wikipedia.org/wiki/UTF-16
- http://tools.ietf.org/html/rfc2781
- \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t.
- C++11 may use char16_t instead. \note implements Encoding concept
- \note For in-memory access, no need to concern endianness. The code units
- and code points are represented by CPU's endianness. For streaming, use
- UTF16LE and UTF16BE, which handle endianness.
- */
- template <typename CharType = wchar_t>
- struct UTF16 {
- typedef CharType Ch;
- RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
- enum { supportUnicode = 1 };
- template <typename OutputStream>
- static void Encode(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
- if (codepoint <= 0xFFFF) {
- RAPIDJSON_ASSERT(
- codepoint < 0xD800 ||
- codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
- os.Put(static_cast<typename OutputStream::Ch>(codepoint));
- } else {
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- unsigned v = codepoint - 0x10000;
- os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
- os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
- }
- }
- template <typename OutputStream>
- static void EncodeUnsafe(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
- if (codepoint <= 0xFFFF) {
- RAPIDJSON_ASSERT(
- codepoint < 0xD800 ||
- codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
- PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
- } else {
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- unsigned v = codepoint - 0x10000;
- PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
- PutUnsafe(os,
- static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
- }
- }
- template <typename InputStream>
- static bool Decode(InputStream &is, unsigned *codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
- typename InputStream::Ch c = is.Take();
- if (c < 0xD800 || c > 0xDFFF) {
- *codepoint = static_cast<unsigned>(c);
- return true;
- } else if (c <= 0xDBFF) {
- *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
- c = is.Take();
- *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
- *codepoint += 0x10000;
- return c >= 0xDC00 && c <= 0xDFFF;
- }
- return false;
- }
- template <typename InputStream, typename OutputStream>
- static bool Validate(InputStream &is, OutputStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
- typename InputStream::Ch c;
- os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
- if (c < 0xD800 || c > 0xDFFF)
- return true;
- else if (c <= 0xDBFF) {
- os.Put(c = is.Take());
- return c >= 0xDC00 && c <= 0xDFFF;
- }
- return false;
- }
- };
- //! UTF-16 little endian encoding.
- template <typename CharType = wchar_t>
- struct UTF16LE : UTF16<CharType> {
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- CharType c = Take(is);
- return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
- }
- template <typename InputByteStream>
- static CharType Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- unsigned c = static_cast<uint8_t>(is.Take());
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
- return static_cast<CharType>(c);
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, CharType c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) &
- 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(
- (static_cast<unsigned>(c) >> 8) & 0xFFu));
- }
- };
- //! UTF-16 big endian encoding.
- template <typename CharType = wchar_t>
- struct UTF16BE : UTF16<CharType> {
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- CharType c = Take(is);
- return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
- }
- template <typename InputByteStream>
- static CharType Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
- return static_cast<CharType>(c);
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, CharType c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(
- (static_cast<unsigned>(c) >> 8) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) &
- 0xFFu));
- }
- };
- ///////////////////////////////////////////////////////////////////////////////
- // UTF32
- //! UTF-32 encoding.
- /*! http://en.wikipedia.org/wiki/UTF-32
- \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned.
- C++11 may use char32_t instead. \note implements Encoding concept
- \note For in-memory access, no need to concern endianness. The code units
- and code points are represented by CPU's endianness. For streaming, use
- UTF32LE and UTF32BE, which handle endianness.
- */
- template <typename CharType = unsigned>
- struct UTF32 {
- typedef CharType Ch;
- RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
- enum { supportUnicode = 1 };
- template <typename OutputStream>
- static void Encode(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- os.Put(codepoint);
- }
- template <typename OutputStream>
- static void EncodeUnsafe(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
- RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
- PutUnsafe(os, codepoint);
- }
- template <typename InputStream>
- static bool Decode(InputStream &is, unsigned *codepoint) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
- Ch c = is.Take();
- *codepoint = c;
- return c <= 0x10FFFF;
- }
- template <typename InputStream, typename OutputStream>
- static bool Validate(InputStream &is, OutputStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
- Ch c;
- os.Put(c = is.Take());
- return c <= 0x10FFFF;
- }
- };
- //! UTF-32 little endian enocoding.
- template <typename CharType = unsigned>
- struct UTF32LE : UTF32<CharType> {
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- CharType c = Take(is);
- return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
- }
- template <typename InputByteStream>
- static CharType Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- unsigned c = static_cast<uint8_t>(is.Take());
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
- return static_cast<CharType>(c);
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
- os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, CharType c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
- }
- };
- //! UTF-32 big endian encoding.
- template <typename CharType = unsigned>
- struct UTF32BE : UTF32<CharType> {
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- CharType c = Take(is);
- return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
- }
- template <typename InputByteStream>
- static CharType Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
- c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
- return static_cast<CharType>(c);
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
- os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
- os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, CharType c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
- os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
- }
- };
- ///////////////////////////////////////////////////////////////////////////////
- // ASCII
- //! ASCII encoding.
- /*! http://en.wikipedia.org/wiki/ASCII
- \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
- \note implements Encoding concept
- */
- template <typename CharType = char>
- struct ASCII {
- typedef CharType Ch;
- enum { supportUnicode = 0 };
- template <typename OutputStream>
- static void Encode(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_ASSERT(codepoint <= 0x7F);
- os.Put(static_cast<Ch>(codepoint & 0xFF));
- }
- template <typename OutputStream>
- static void EncodeUnsafe(OutputStream &os, unsigned codepoint) {
- RAPIDJSON_ASSERT(codepoint <= 0x7F);
- PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
- }
- template <typename InputStream>
- static bool Decode(InputStream &is, unsigned *codepoint) {
- uint8_t c = static_cast<uint8_t>(is.Take());
- *codepoint = c;
- return c <= 0X7F;
- }
- template <typename InputStream, typename OutputStream>
- static bool Validate(InputStream &is, OutputStream &os) {
- uint8_t c = static_cast<uint8_t>(is.Take());
- os.Put(static_cast<typename OutputStream::Ch>(c));
- return c <= 0x7F;
- }
- template <typename InputByteStream>
- static CharType TakeBOM(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- uint8_t c = static_cast<uint8_t>(Take(is));
- return static_cast<Ch>(c);
- }
- template <typename InputByteStream>
- static Ch Take(InputByteStream &is) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
- return static_cast<Ch>(is.Take());
- }
- template <typename OutputByteStream>
- static void PutBOM(OutputByteStream &os) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- (void)os;
- }
- template <typename OutputByteStream>
- static void Put(OutputByteStream &os, Ch c) {
- RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
- os.Put(static_cast<typename OutputByteStream::Ch>(c));
- }
- };
- ///////////////////////////////////////////////////////////////////////////////
- // AutoUTF
- //! Runtime-specified UTF encoding type of a stream.
- enum UTFType {
- kUTF8 = 0, //!< UTF-8.
- kUTF16LE = 1, //!< UTF-16 little endian.
- kUTF16BE = 2, //!< UTF-16 big endian.
- kUTF32LE = 3, //!< UTF-32 little endian.
- kUTF32BE = 4 //!< UTF-32 big endian.
- };
- //! Dynamically select encoding according to stream's runtime-specified UTF
- //! encoding type.
- /*! \note This class can be used with AutoUTFInputtStream and
- * AutoUTFOutputStream, which provides GetType().
- */
- template <typename CharType>
- struct AutoUTF {
- typedef CharType Ch;
- enum { supportUnicode = 1 };
- #define RAPIDJSON_ENCODINGS_FUNC(x) \
- UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
- template <typename OutputStream>
- static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os,
- unsigned codepoint) {
- typedef void (*EncodeFunc)(OutputStream &, unsigned);
- static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Encode)};
- (*f[os.GetType()])(os, codepoint);
- }
- template <typename OutputStream>
- static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os,
- unsigned codepoint) {
- typedef void (*EncodeFunc)(OutputStream &, unsigned);
- static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe)};
- (*f[os.GetType()])(os, codepoint);
- }
- template <typename InputStream>
- static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is,
- unsigned *codepoint) {
- typedef bool (*DecodeFunc)(InputStream &, unsigned *);
- static const DecodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Decode)};
- return (*f[is.GetType()])(is, codepoint);
- }
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is,
- OutputStream &os) {
- typedef bool (*ValidateFunc)(InputStream &, OutputStream &);
- static const ValidateFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Validate)};
- return (*f[is.GetType()])(is, os);
- }
- #undef RAPIDJSON_ENCODINGS_FUNC
- };
- ///////////////////////////////////////////////////////////////////////////////
- // Transcoder
- //! Encoding conversion.
- template <typename SourceEncoding, typename TargetEncoding>
- struct Transcoder {
- //! Take one Unicode codepoint from source encoding, convert it to target
- //! encoding and put it to the output stream.
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is,
- OutputStream &os) {
- unsigned codepoint;
- if (!SourceEncoding::Decode(is, &codepoint)) return false;
- TargetEncoding::Encode(os, codepoint);
- return true;
- }
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is,
- OutputStream &os) {
- unsigned codepoint;
- if (!SourceEncoding::Decode(is, &codepoint)) return false;
- TargetEncoding::EncodeUnsafe(os, codepoint);
- return true;
- }
- //! Validate one Unicode codepoint from an encoded stream.
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is,
- OutputStream &os) {
- return Transcode(
- is, os); // Since source/target encoding is different, must transcode.
- }
- };
- // Forward declaration.
- template <typename Stream>
- inline void PutUnsafe(Stream &stream, typename Stream::Ch c);
- //! Specialization of Transcoder with same source and target encoding.
- template <typename Encoding>
- struct Transcoder<Encoding, Encoding> {
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is,
- OutputStream &os) {
- os.Put(is.Take()); // Just copy one code unit. This semantic is different
- // from primary template class.
- return true;
- }
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is,
- OutputStream &os) {
- PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is
- // different from primary template class.
- return true;
- }
- template <typename InputStream, typename OutputStream>
- static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is,
- OutputStream &os) {
- return Encoding::Validate(is, os); // source/target encoding are the same
- }
- };
- RAPIDJSON_NAMESPACE_END
- #if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__))
- RAPIDJSON_DIAG_POP
- #endif
- #endif // RAPIDJSON_ENCODINGS_H_
|