reader.h 87 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458
  1. // Tencent is pleased to support the open source community by making RapidJSON
  2. // available.
  3. //
  4. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All
  5. // rights reserved.
  6. //
  7. // Licensed under the MIT License (the "License"); you may not use this file
  8. // except in compliance with the License. You may obtain a copy of the License
  9. // at
  10. //
  11. // http://opensource.org/licenses/MIT
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. // License for the specific language governing permissions and limitations under
  17. // the License.
  18. #ifndef RAPIDJSON_READER_H_
  19. #define RAPIDJSON_READER_H_
  20. /*! \file reader.h */
  21. #include <limits>
  22. #include "allocators.h"
  23. #include "encodedstream.h"
  24. #include "internal/clzll.h"
  25. #include "internal/meta.h"
  26. #include "internal/stack.h"
  27. #include "internal/strtod.h"
  28. #include "stream.h"
  29. #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
  30. #include <intrin.h>
  31. #pragma intrinsic(_BitScanForward)
  32. #endif
  33. #ifdef RAPIDJSON_SSE42
  34. #include <nmmintrin.h>
  35. #elif defined(RAPIDJSON_SSE2)
  36. #include <emmintrin.h>
  37. #elif defined(RAPIDJSON_NEON)
  38. #include <arm_neon.h>
  39. #endif
  40. #ifdef __clang__
  41. RAPIDJSON_DIAG_PUSH
  42. RAPIDJSON_DIAG_OFF(old - style - cast)
  43. RAPIDJSON_DIAG_OFF(padded)
  44. RAPIDJSON_DIAG_OFF(switch - enum)
  45. #elif defined(_MSC_VER)
  46. RAPIDJSON_DIAG_PUSH
  47. RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
  48. RAPIDJSON_DIAG_OFF(4702) // unreachable code
  49. #endif
  50. #ifdef __GNUC__
  51. RAPIDJSON_DIAG_PUSH
  52. RAPIDJSON_DIAG_OFF(effc++)
  53. #endif
  54. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  55. #define RAPIDJSON_NOTHING /* deliberately empty */
  56. #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
  57. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
  58. RAPIDJSON_MULTILINEMACRO_BEGIN \
  59. if (RAPIDJSON_UNLIKELY(HasParseError())) { \
  60. return value; \
  61. } \
  62. RAPIDJSON_MULTILINEMACRO_END
  63. #endif
  64. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
  65. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
  66. //!@endcond
  67. /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
  68. \ingroup RAPIDJSON_ERRORS
  69. \brief Macro to indicate a parse error.
  70. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  71. \param offset position of the error in JSON input (\c size_t)
  72. This macros can be used as a customization point for the internal
  73. error handling mechanism of RapidJSON.
  74. A common usage model is to throw an exception instead of requiring the
  75. caller to explicitly check the \ref rapidjson::GenericReader::Parse's
  76. return value:
  77. \code
  78. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
  79. throw ParseException(parseErrorCode, #parseErrorCode, offset)
  80. #include <stdexcept> // std::runtime_error
  81. #include "rapidjson/error/error.h" // rapidjson::ParseResult
  82. struct ParseException : std::runtime_error, rapidjson::ParseResult {
  83. ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t
  84. offset) : std::runtime_error(msg), ParseResult(code, offset) {}
  85. };
  86. #include "rapidjson/reader.h"
  87. \endcode
  88. \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
  89. */
  90. #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
  91. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
  92. RAPIDJSON_MULTILINEMACRO_BEGIN \
  93. RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
  94. SetParseError(parseErrorCode, offset); \
  95. RAPIDJSON_MULTILINEMACRO_END
  96. #endif
  97. /*! \def RAPIDJSON_PARSE_ERROR
  98. \ingroup RAPIDJSON_ERRORS
  99. \brief (Internal) macro to indicate and handle a parse error.
  100. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  101. \param offset position of the error in JSON input (\c size_t)
  102. Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
  103. \see RAPIDJSON_PARSE_ERROR_NORETURN
  104. \hideinitializer
  105. */
  106. #ifndef RAPIDJSON_PARSE_ERROR
  107. #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
  108. RAPIDJSON_MULTILINEMACRO_BEGIN \
  109. RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
  110. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
  111. RAPIDJSON_MULTILINEMACRO_END
  112. #endif
  113. #include "error/error.h" // ParseErrorCode, ParseResult
  114. RAPIDJSON_NAMESPACE_BEGIN
  115. ///////////////////////////////////////////////////////////////////////////////
  116. // ParseFlag
  117. /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
  118. \ingroup RAPIDJSON_CONFIG
  119. \brief User-defined kParseDefaultFlags definition.
  120. User can define this as any \c ParseFlag combinations.
  121. */
  122. #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
  123. #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
  124. #endif
  125. //! Combination of parseFlags
  126. /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu,
  127. * Document::ParseStream
  128. */
  129. enum ParseFlag {
  130. kParseNoFlags = 0, //!< No flags are set.
  131. kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
  132. kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
  133. kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of
  134. //!< function call stack size) parsing.
  135. kParseStopWhenDoneFlag =
  136. 8, //!< After parsing a complete JSON root from stream, stop further
  137. //!< processing the rest of stream. When this flag is used, parser will
  138. //!< not generate kParseErrorDocumentRootNotSingular error.
  139. kParseFullPrecisionFlag =
  140. 16, //!< Parse number in full precision (but slower).
  141. kParseCommentsFlag =
  142. 32, //!< Allow one-line (//) and multi-line (/**/) comments.
  143. kParseNumbersAsStringsFlag =
  144. 64, //!< Parse all numbers (ints/doubles) as strings.
  145. kParseTrailingCommasFlag =
  146. 128, //!< Allow trailing commas at the end of objects and arrays.
  147. kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and
  148. //!-Infinity as doubles.
  149. kParseDefaultFlags =
  150. RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized
  151. //!< by defining
  152. //!< RAPIDJSON_PARSE_DEFAULT_FLAGS
  153. };
  154. ///////////////////////////////////////////////////////////////////////////////
  155. // Handler
  156. /*! \class rapidjson::Handler
  157. \brief Concept for receiving events from GenericReader upon parsing.
  158. The functions return true if no error occurs. If they return false,
  159. the event publisher should terminate the process.
  160. \code
  161. concept Handler {
  162. typename Ch;
  163. bool Null();
  164. bool Bool(bool b);
  165. bool Int(int i);
  166. bool Uint(unsigned i);
  167. bool Int64(int64_t i);
  168. bool Uint64(uint64_t i);
  169. bool Double(double d);
  170. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated
  171. (use length) bool RawNumber(const Ch* str, SizeType length, bool copy); bool
  172. String(const Ch* str, SizeType length, bool copy); bool StartObject(); bool
  173. Key(const Ch* str, SizeType length, bool copy); bool EndObject(SizeType
  174. memberCount); bool StartArray(); bool EndArray(SizeType elementCount);
  175. };
  176. \endcode
  177. */
  178. ///////////////////////////////////////////////////////////////////////////////
  179. // BaseReaderHandler
  180. //! Default implementation of Handler.
  181. /*! This can be used as base class of any reader handler.
  182. \note implements Handler concept
  183. */
  184. template <typename Encoding = UTF8<>, typename Derived = void>
  185. struct BaseReaderHandler {
  186. typedef typename Encoding::Ch Ch;
  187. typedef
  188. typename internal::SelectIf<internal::IsSame<Derived, void>,
  189. BaseReaderHandler, Derived>::Type Override;
  190. bool Default() { return true; }
  191. bool Null() { return static_cast<Override &>(*this).Default(); }
  192. bool Bool(bool) { return static_cast<Override &>(*this).Default(); }
  193. bool Int(int) { return static_cast<Override &>(*this).Default(); }
  194. bool Uint(unsigned) { return static_cast<Override &>(*this).Default(); }
  195. bool Int64(int64_t) { return static_cast<Override &>(*this).Default(); }
  196. bool Uint64(uint64_t) { return static_cast<Override &>(*this).Default(); }
  197. bool Double(double) { return static_cast<Override &>(*this).Default(); }
  198. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use
  199. /// length)
  200. bool RawNumber(const Ch *str, SizeType len, bool copy) {
  201. return static_cast<Override &>(*this).String(str, len, copy);
  202. }
  203. bool String(const Ch *, SizeType, bool) {
  204. return static_cast<Override &>(*this).Default();
  205. }
  206. bool StartObject() { return static_cast<Override &>(*this).Default(); }
  207. bool Key(const Ch *str, SizeType len, bool copy) {
  208. return static_cast<Override &>(*this).String(str, len, copy);
  209. }
  210. bool EndObject(SizeType) { return static_cast<Override &>(*this).Default(); }
  211. bool StartArray() { return static_cast<Override &>(*this).Default(); }
  212. bool EndArray(SizeType) { return static_cast<Override &>(*this).Default(); }
  213. };
  214. ///////////////////////////////////////////////////////////////////////////////
  215. // StreamLocalCopy
  216. namespace internal {
  217. template <typename Stream, int = StreamTraits<Stream>::copyOptimization>
  218. class StreamLocalCopy;
  219. //! Do copy optimization.
  220. template <typename Stream>
  221. class StreamLocalCopy<Stream, 1> {
  222. public:
  223. StreamLocalCopy(Stream &original) : s(original), original_(original) {}
  224. ~StreamLocalCopy() { original_ = s; }
  225. Stream s;
  226. private:
  227. StreamLocalCopy &operator=(const StreamLocalCopy &) /* = delete */;
  228. Stream &original_;
  229. };
  230. //! Keep reference.
  231. template <typename Stream>
  232. class StreamLocalCopy<Stream, 0> {
  233. public:
  234. StreamLocalCopy(Stream &original) : s(original) {}
  235. Stream &s;
  236. private:
  237. StreamLocalCopy &operator=(const StreamLocalCopy &) /* = delete */;
  238. };
  239. } // namespace internal
  240. ///////////////////////////////////////////////////////////////////////////////
  241. // SkipWhitespace
  242. //! Skip the JSON white spaces in a stream.
  243. /*! \param is A input stream for skipping white spaces.
  244. \note This function has SSE2/SSE4.2 specialization.
  245. */
  246. template <typename InputStream>
  247. void SkipWhitespace(InputStream &is) {
  248. internal::StreamLocalCopy<InputStream> copy(is);
  249. InputStream &s(copy.s);
  250. typename InputStream::Ch c;
  251. while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') s.Take();
  252. }
  253. inline const char *SkipWhitespace(const char *p, const char *end) {
  254. while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) ++p;
  255. return p;
  256. }
  257. #ifdef RAPIDJSON_SSE42
  258. //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte
  259. //! characters at once.
  260. inline const char *SkipWhitespace_SIMD(const char *p) {
  261. // Fast return for single non-whitespace
  262. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  263. ++p;
  264. else
  265. return p;
  266. // 16-byte align to the next boundary
  267. const char *nextAligned = reinterpret_cast<const char *>(
  268. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  269. while (p != nextAligned)
  270. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  271. ++p;
  272. else
  273. return p;
  274. // The rest of string using SIMD
  275. static const char whitespace[16] = " \n\r\t";
  276. const __m128i w =
  277. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  278. for (;; p += 16) {
  279. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  280. const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY |
  281. _SIDD_LEAST_SIGNIFICANT |
  282. _SIDD_NEGATIVE_POLARITY);
  283. if (r != 16) // some of characters is non-whitespace
  284. return p + r;
  285. }
  286. }
  287. inline const char *SkipWhitespace_SIMD(const char *p, const char *end) {
  288. // Fast return for single non-whitespace
  289. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  290. ++p;
  291. else
  292. return p;
  293. // The middle of string using SIMD
  294. static const char whitespace[16] = " \n\r\t";
  295. const __m128i w =
  296. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  297. for (; p <= end - 16; p += 16) {
  298. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  299. const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY |
  300. _SIDD_LEAST_SIGNIFICANT |
  301. _SIDD_NEGATIVE_POLARITY);
  302. if (r != 16) // some of characters is non-whitespace
  303. return p + r;
  304. }
  305. return SkipWhitespace(p, end);
  306. }
  307. #elif defined(RAPIDJSON_SSE2)
  308. //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at
  309. //! once.
  310. inline const char *SkipWhitespace_SIMD(const char *p) {
  311. // Fast return for single non-whitespace
  312. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  313. ++p;
  314. else
  315. return p;
  316. // 16-byte align to the next boundary
  317. const char *nextAligned = reinterpret_cast<const char *>(
  318. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  319. while (p != nextAligned)
  320. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  321. ++p;
  322. else
  323. return p;
  324. // The rest of string
  325. #define C16(c) \
  326. { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  327. static const char whitespaces[4][16] = {C16(' '), C16('\n'), C16('\r'),
  328. C16('\t')};
  329. #undef C16
  330. const __m128i w0 =
  331. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  332. const __m128i w1 =
  333. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  334. const __m128i w2 =
  335. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  336. const __m128i w3 =
  337. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  338. for (;; p += 16) {
  339. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  340. __m128i x = _mm_cmpeq_epi8(s, w0);
  341. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  342. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  343. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  344. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  345. if (r != 0) { // some of characters may be non-whitespace
  346. #ifdef _MSC_VER // Find the index of first non-whitespace
  347. unsigned long offset;
  348. _BitScanForward(&offset, r);
  349. return p + offset;
  350. #else
  351. return p + __builtin_ffs(r) - 1;
  352. #endif
  353. }
  354. }
  355. }
  356. inline const char *SkipWhitespace_SIMD(const char *p, const char *end) {
  357. // Fast return for single non-whitespace
  358. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  359. ++p;
  360. else
  361. return p;
  362. // The rest of string
  363. #define C16(c) \
  364. { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  365. static const char whitespaces[4][16] = {C16(' '), C16('\n'), C16('\r'),
  366. C16('\t')};
  367. #undef C16
  368. const __m128i w0 =
  369. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  370. const __m128i w1 =
  371. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  372. const __m128i w2 =
  373. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  374. const __m128i w3 =
  375. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  376. for (; p <= end - 16; p += 16) {
  377. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  378. __m128i x = _mm_cmpeq_epi8(s, w0);
  379. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  380. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  381. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  382. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  383. if (r != 0) { // some of characters may be non-whitespace
  384. #ifdef _MSC_VER // Find the index of first non-whitespace
  385. unsigned long offset;
  386. _BitScanForward(&offset, r);
  387. return p + offset;
  388. #else
  389. return p + __builtin_ffs(r) - 1;
  390. #endif
  391. }
  392. }
  393. return SkipWhitespace(p, end);
  394. }
  395. #elif defined(RAPIDJSON_NEON)
  396. //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at
  397. //! once.
  398. inline const char *SkipWhitespace_SIMD(const char *p) {
  399. // Fast return for single non-whitespace
  400. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  401. ++p;
  402. else
  403. return p;
  404. // 16-byte align to the next boundary
  405. const char *nextAligned = reinterpret_cast<const char *>(
  406. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  407. while (p != nextAligned)
  408. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  409. ++p;
  410. else
  411. return p;
  412. const uint8x16_t w0 = vmovq_n_u8(' ');
  413. const uint8x16_t w1 = vmovq_n_u8('\n');
  414. const uint8x16_t w2 = vmovq_n_u8('\r');
  415. const uint8x16_t w3 = vmovq_n_u8('\t');
  416. for (;; p += 16) {
  417. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  418. uint8x16_t x = vceqq_u8(s, w0);
  419. x = vorrq_u8(x, vceqq_u8(s, w1));
  420. x = vorrq_u8(x, vceqq_u8(s, w2));
  421. x = vorrq_u8(x, vceqq_u8(s, w3));
  422. x = vmvnq_u8(x); // Negate
  423. x = vrev64q_u8(x); // Rev in 64
  424. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  425. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  426. if (low == 0) {
  427. if (high != 0) {
  428. uint32_t lz = RAPIDJSON_CLZLL(high);
  429. return p + 8 + (lz >> 3);
  430. }
  431. } else {
  432. uint32_t lz = RAPIDJSON_CLZLL(low);
  433. return p + (lz >> 3);
  434. }
  435. }
  436. }
  437. inline const char *SkipWhitespace_SIMD(const char *p, const char *end) {
  438. // Fast return for single non-whitespace
  439. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  440. ++p;
  441. else
  442. return p;
  443. const uint8x16_t w0 = vmovq_n_u8(' ');
  444. const uint8x16_t w1 = vmovq_n_u8('\n');
  445. const uint8x16_t w2 = vmovq_n_u8('\r');
  446. const uint8x16_t w3 = vmovq_n_u8('\t');
  447. for (; p <= end - 16; p += 16) {
  448. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  449. uint8x16_t x = vceqq_u8(s, w0);
  450. x = vorrq_u8(x, vceqq_u8(s, w1));
  451. x = vorrq_u8(x, vceqq_u8(s, w2));
  452. x = vorrq_u8(x, vceqq_u8(s, w3));
  453. x = vmvnq_u8(x); // Negate
  454. x = vrev64q_u8(x); // Rev in 64
  455. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  456. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  457. if (low == 0) {
  458. if (high != 0) {
  459. uint32_t lz = RAPIDJSON_CLZLL(high);
  460. return p + 8 + (lz >> 3);
  461. }
  462. } else {
  463. uint32_t lz = RAPIDJSON_CLZLL(low);
  464. return p + (lz >> 3);
  465. }
  466. }
  467. return SkipWhitespace(p, end);
  468. }
  469. #endif // RAPIDJSON_NEON
  470. #ifdef RAPIDJSON_SIMD
  471. //! Template function specialization for InsituStringStream
  472. template <>
  473. inline void SkipWhitespace(InsituStringStream &is) {
  474. is.src_ = const_cast<char *>(SkipWhitespace_SIMD(is.src_));
  475. }
  476. //! Template function specialization for StringStream
  477. template <>
  478. inline void SkipWhitespace(StringStream &is) {
  479. is.src_ = SkipWhitespace_SIMD(is.src_);
  480. }
  481. template <>
  482. inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream> &is) {
  483. is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
  484. }
  485. #endif // RAPIDJSON_SIMD
  486. ///////////////////////////////////////////////////////////////////////////////
  487. // GenericReader
  488. //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default
  489. //! allocator.
  490. /*! GenericReader parses JSON text from a stream, and send events synchronously
  491. to an object implementing Handler concept.
  492. It needs to allocate a stack for storing a single decoded string during
  493. non-destructive parsing.
  494. For in-situ parsing, the decoded string is directly written to the source
  495. text string, no temporary buffer is required.
  496. A GenericReader object can be reused for parsing multiple JSON text.
  497. \tparam SourceEncoding Encoding of the input stream.
  498. \tparam TargetEncoding Encoding of the parse output.
  499. \tparam StackAllocator Allocator type for stack.
  500. */
  501. template <typename SourceEncoding, typename TargetEncoding,
  502. typename StackAllocator = CrtAllocator>
  503. class GenericReader {
  504. public:
  505. typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
  506. //! Constructor.
  507. /*! \param stackAllocator Optional allocator for allocating stack memory.
  508. (Only use for non-destructive parsing) \param stackCapacity stack capacity
  509. in bytes for storing a single decoded string. (Only use for
  510. non-destructive parsing)
  511. */
  512. GenericReader(StackAllocator *stackAllocator = 0,
  513. size_t stackCapacity = kDefaultStackCapacity)
  514. : stack_(stackAllocator, stackCapacity),
  515. parseResult_(),
  516. state_(IterativeParsingStartState) {}
  517. //! Parse JSON text.
  518. /*! \tparam parseFlags Combination of \ref ParseFlag.
  519. \tparam InputStream Type of input stream, implementing Stream concept.
  520. \tparam Handler Type of handler, implementing Handler concept.
  521. \param is Input stream to be parsed.
  522. \param handler The handler to receive events.
  523. \return Whether the parsing is successful.
  524. */
  525. template <unsigned parseFlags, typename InputStream, typename Handler>
  526. ParseResult Parse(InputStream &is, Handler &handler) {
  527. if (parseFlags & kParseIterativeFlag)
  528. return IterativeParse<parseFlags>(is, handler);
  529. parseResult_.Clear();
  530. ClearStackOnExit scope(*this);
  531. SkipWhitespaceAndComments<parseFlags>(is);
  532. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  533. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
  534. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
  535. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  536. } else {
  537. ParseValue<parseFlags>(is, handler);
  538. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  539. if (!(parseFlags & kParseStopWhenDoneFlag)) {
  540. SkipWhitespaceAndComments<parseFlags>(is);
  541. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  542. if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
  543. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular,
  544. is.Tell());
  545. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  546. }
  547. }
  548. }
  549. return parseResult_;
  550. }
  551. //! Parse JSON text (with \ref kParseDefaultFlags)
  552. /*! \tparam InputStream Type of input stream, implementing Stream concept
  553. \tparam Handler Type of handler, implementing Handler concept.
  554. \param is Input stream to be parsed.
  555. \param handler The handler to receive events.
  556. \return Whether the parsing is successful.
  557. */
  558. template <typename InputStream, typename Handler>
  559. ParseResult Parse(InputStream &is, Handler &handler) {
  560. return Parse<kParseDefaultFlags>(is, handler);
  561. }
  562. //! Initialize JSON text token-by-token parsing
  563. /*!
  564. */
  565. void IterativeParseInit() {
  566. parseResult_.Clear();
  567. state_ = IterativeParsingStartState;
  568. }
  569. //! Parse one token from JSON text
  570. /*! \tparam InputStream Type of input stream, implementing Stream concept
  571. \tparam Handler Type of handler, implementing Handler concept.
  572. \param is Input stream to be parsed.
  573. \param handler The handler to receive events.
  574. \return Whether the parsing is successful.
  575. */
  576. template <unsigned parseFlags, typename InputStream, typename Handler>
  577. bool IterativeParseNext(InputStream &is, Handler &handler) {
  578. while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
  579. SkipWhitespaceAndComments<parseFlags>(is);
  580. Token t = Tokenize(is.Peek());
  581. IterativeParsingState n = Predict(state_, t);
  582. IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
  583. // If we've finished or hit an error...
  584. if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
  585. // Report errors.
  586. if (d == IterativeParsingErrorState) {
  587. HandleError(state_, is);
  588. return false;
  589. }
  590. // Transition to the finish state.
  591. RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
  592. state_ = d;
  593. // If StopWhenDone is not set...
  594. if (!(parseFlags & kParseStopWhenDoneFlag)) {
  595. // ... and extra non-whitespace data is found...
  596. SkipWhitespaceAndComments<parseFlags>(is);
  597. if (is.Peek() != '\0') {
  598. // ... this is considered an error.
  599. HandleError(state_, is);
  600. return false;
  601. }
  602. }
  603. // Success! We are done!
  604. return true;
  605. }
  606. // Transition to the new state.
  607. state_ = d;
  608. // If we parsed anything other than a delimiter, we invoked the handler,
  609. // so we can return true now.
  610. if (!IsIterativeParsingDelimiterState(n)) return true;
  611. }
  612. // We reached the end of file.
  613. stack_.Clear();
  614. if (state_ != IterativeParsingFinishState) {
  615. HandleError(state_, is);
  616. return false;
  617. }
  618. return true;
  619. }
  620. //! Check if token-by-token parsing JSON text is complete
  621. /*! \return Whether the JSON has been fully decoded.
  622. */
  623. RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
  624. return IsIterativeParsingCompleteState(state_);
  625. }
  626. //! Whether a parse error has occurred in the last parsing.
  627. bool HasParseError() const { return parseResult_.IsError(); }
  628. //! Get the \ref ParseErrorCode of last parsing.
  629. ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
  630. //! Get the position of last parsing error in input, 0 otherwise.
  631. size_t GetErrorOffset() const { return parseResult_.Offset(); }
  632. protected:
  633. void SetParseError(ParseErrorCode code, size_t offset) {
  634. parseResult_.Set(code, offset);
  635. }
  636. private:
  637. // Prohibit copy constructor & assignment operator.
  638. GenericReader(const GenericReader &);
  639. GenericReader &operator=(const GenericReader &);
  640. void ClearStack() { stack_.Clear(); }
  641. // clear stack on any exit from ParseStream, e.g. due to exception
  642. struct ClearStackOnExit {
  643. explicit ClearStackOnExit(GenericReader &r) : r_(r) {}
  644. ~ClearStackOnExit() { r_.ClearStack(); }
  645. private:
  646. GenericReader &r_;
  647. ClearStackOnExit(const ClearStackOnExit &);
  648. ClearStackOnExit &operator=(const ClearStackOnExit &);
  649. };
  650. template <unsigned parseFlags, typename InputStream>
  651. void SkipWhitespaceAndComments(InputStream &is) {
  652. SkipWhitespace(is);
  653. if (parseFlags & kParseCommentsFlag) {
  654. while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
  655. if (Consume(is, '*')) {
  656. while (true) {
  657. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
  658. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError,
  659. is.Tell());
  660. else if (Consume(is, '*')) {
  661. if (Consume(is, '/')) break;
  662. } else
  663. is.Take();
  664. }
  665. } else if (RAPIDJSON_LIKELY(Consume(is, '/')))
  666. while (is.Peek() != '\0' && is.Take() != '\n') {
  667. }
  668. else
  669. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
  670. SkipWhitespace(is);
  671. }
  672. }
  673. }
  674. // Parse object: { string : value, ... }
  675. template <unsigned parseFlags, typename InputStream, typename Handler>
  676. void ParseObject(InputStream &is, Handler &handler) {
  677. RAPIDJSON_ASSERT(is.Peek() == '{');
  678. is.Take(); // Skip '{'
  679. if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
  680. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  681. SkipWhitespaceAndComments<parseFlags>(is);
  682. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  683. if (Consume(is, '}')) {
  684. if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
  685. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  686. return;
  687. }
  688. for (SizeType memberCount = 0;;) {
  689. if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
  690. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
  691. ParseString<parseFlags>(is, handler, true);
  692. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  693. SkipWhitespaceAndComments<parseFlags>(is);
  694. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  695. if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
  696. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
  697. SkipWhitespaceAndComments<parseFlags>(is);
  698. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  699. ParseValue<parseFlags>(is, handler);
  700. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  701. SkipWhitespaceAndComments<parseFlags>(is);
  702. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  703. ++memberCount;
  704. switch (is.Peek()) {
  705. case ',':
  706. is.Take();
  707. SkipWhitespaceAndComments<parseFlags>(is);
  708. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  709. break;
  710. case '}':
  711. is.Take();
  712. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  713. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  714. return;
  715. default:
  716. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket,
  717. is.Tell());
  718. break; // This useless break is only for making warning and coverage
  719. // happy
  720. }
  721. if (parseFlags & kParseTrailingCommasFlag) {
  722. if (is.Peek() == '}') {
  723. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  724. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  725. is.Take();
  726. return;
  727. }
  728. }
  729. }
  730. }
  731. // Parse array: [ value, ... ]
  732. template <unsigned parseFlags, typename InputStream, typename Handler>
  733. void ParseArray(InputStream &is, Handler &handler) {
  734. RAPIDJSON_ASSERT(is.Peek() == '[');
  735. is.Take(); // Skip '['
  736. if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
  737. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  738. SkipWhitespaceAndComments<parseFlags>(is);
  739. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  740. if (Consume(is, ']')) {
  741. if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
  742. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  743. return;
  744. }
  745. for (SizeType elementCount = 0;;) {
  746. ParseValue<parseFlags>(is, handler);
  747. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  748. ++elementCount;
  749. SkipWhitespaceAndComments<parseFlags>(is);
  750. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  751. if (Consume(is, ',')) {
  752. SkipWhitespaceAndComments<parseFlags>(is);
  753. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  754. } else if (Consume(is, ']')) {
  755. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  756. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  757. return;
  758. } else
  759. RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket,
  760. is.Tell());
  761. if (parseFlags & kParseTrailingCommasFlag) {
  762. if (is.Peek() == ']') {
  763. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  764. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  765. is.Take();
  766. return;
  767. }
  768. }
  769. }
  770. }
  771. template <unsigned parseFlags, typename InputStream, typename Handler>
  772. void ParseNull(InputStream &is, Handler &handler) {
  773. RAPIDJSON_ASSERT(is.Peek() == 'n');
  774. is.Take();
  775. if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') &&
  776. Consume(is, 'l'))) {
  777. if (RAPIDJSON_UNLIKELY(!handler.Null()))
  778. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  779. } else
  780. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  781. }
  782. template <unsigned parseFlags, typename InputStream, typename Handler>
  783. void ParseTrue(InputStream &is, Handler &handler) {
  784. RAPIDJSON_ASSERT(is.Peek() == 't');
  785. is.Take();
  786. if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') &&
  787. Consume(is, 'e'))) {
  788. if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
  789. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  790. } else
  791. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  792. }
  793. template <unsigned parseFlags, typename InputStream, typename Handler>
  794. void ParseFalse(InputStream &is, Handler &handler) {
  795. RAPIDJSON_ASSERT(is.Peek() == 'f');
  796. is.Take();
  797. if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') &&
  798. Consume(is, 's') && Consume(is, 'e'))) {
  799. if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
  800. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  801. } else
  802. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  803. }
  804. template <typename InputStream>
  805. RAPIDJSON_FORCEINLINE static bool Consume(InputStream &is,
  806. typename InputStream::Ch expect) {
  807. if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
  808. is.Take();
  809. return true;
  810. } else
  811. return false;
  812. }
  813. // Helper function to parse four hexadecimal digits in \uXXXX in
  814. // ParseString().
  815. template <typename InputStream>
  816. unsigned ParseHex4(InputStream &is, size_t escapeOffset) {
  817. unsigned codepoint = 0;
  818. for (int i = 0; i < 4; i++) {
  819. Ch c = is.Peek();
  820. codepoint <<= 4;
  821. codepoint += static_cast<unsigned>(c);
  822. if (c >= '0' && c <= '9')
  823. codepoint -= '0';
  824. else if (c >= 'A' && c <= 'F')
  825. codepoint -= 'A' - 10;
  826. else if (c >= 'a' && c <= 'f')
  827. codepoint -= 'a' - 10;
  828. else {
  829. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex,
  830. escapeOffset);
  831. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
  832. }
  833. is.Take();
  834. }
  835. return codepoint;
  836. }
  837. template <typename CharType>
  838. class StackStream {
  839. public:
  840. typedef CharType Ch;
  841. StackStream(internal::Stack<StackAllocator> &stack)
  842. : stack_(stack), length_(0) {}
  843. RAPIDJSON_FORCEINLINE void Put(Ch c) {
  844. *stack_.template Push<Ch>() = c;
  845. ++length_;
  846. }
  847. RAPIDJSON_FORCEINLINE void *Push(SizeType count) {
  848. length_ += count;
  849. return stack_.template Push<Ch>(count);
  850. }
  851. size_t Length() const { return length_; }
  852. Ch *Pop() { return stack_.template Pop<Ch>(length_); }
  853. private:
  854. StackStream(const StackStream &);
  855. StackStream &operator=(const StackStream &);
  856. internal::Stack<StackAllocator> &stack_;
  857. SizeType length_;
  858. };
  859. // Parse string and generate String event. Different code paths for
  860. // kParseInsituFlag.
  861. template <unsigned parseFlags, typename InputStream, typename Handler>
  862. void ParseString(InputStream &is, Handler &handler, bool isKey = false) {
  863. internal::StreamLocalCopy<InputStream> copy(is);
  864. InputStream &s(copy.s);
  865. RAPIDJSON_ASSERT(s.Peek() == '\"');
  866. s.Take(); // Skip '\"'
  867. bool success = false;
  868. if (parseFlags & kParseInsituFlag) {
  869. typename InputStream::Ch *head = s.PutBegin();
  870. ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
  871. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  872. size_t length = s.PutEnd(head) - 1;
  873. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  874. const typename TargetEncoding::Ch *const str =
  875. reinterpret_cast<typename TargetEncoding::Ch *>(head);
  876. success = (isKey ? handler.Key(str, SizeType(length), false)
  877. : handler.String(str, SizeType(length), false));
  878. } else {
  879. StackStream<typename TargetEncoding::Ch> stackStream(stack_);
  880. ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(
  881. s, stackStream);
  882. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  883. SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
  884. const typename TargetEncoding::Ch *const str = stackStream.Pop();
  885. success = (isKey ? handler.Key(str, length, true)
  886. : handler.String(str, length, true));
  887. }
  888. if (RAPIDJSON_UNLIKELY(!success))
  889. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
  890. }
  891. // Parse string to an output is
  892. // This function handles the prefix/suffix double quotes, escaping, and
  893. // optional encoding validation.
  894. template <unsigned parseFlags, typename SEncoding, typename TEncoding,
  895. typename InputStream, typename OutputStream>
  896. RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream &is,
  897. OutputStream &os) {
  898. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  899. #define Z16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  900. static const char escape[256] = {
  901. Z16, Z16, 0, 0, '\"', 0, 0, 0, 0, 0, 0, 0, 0,
  902. 0, 0, 0, 0, '/', Z16, Z16, 0, 0, 0, 0, 0, 0,
  903. 0, 0, 0, 0, 0, 0, '\\', 0, 0, 0, 0, 0, '\b',
  904. 0, 0, 0, '\f', 0, 0, 0, 0, 0, 0, 0, '\n', 0,
  905. 0, 0, '\r', 0, '\t', 0, 0, 0, 0, 0, 0, 0, 0,
  906. 0, 0, 0, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16};
  907. #undef Z16
  908. //!@endcond
  909. for (;;) {
  910. // Scan and copy string before "\\\"" or < 0x20. This is an optional
  911. // optimzation.
  912. if (!(parseFlags & kParseValidateEncodingFlag))
  913. ScanCopyUnescapedString(is, os);
  914. Ch c = is.Peek();
  915. if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
  916. size_t escapeOffset = is.Tell(); // For invalid escaping, report the
  917. // initial '\\' as error offset
  918. is.Take();
  919. Ch e = is.Peek();
  920. if ((sizeof(Ch) == 1 || unsigned(e) < 256) &&
  921. RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
  922. is.Take();
  923. os.Put(static_cast<typename TEncoding::Ch>(
  924. escape[static_cast<unsigned char>(e)]));
  925. } else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
  926. is.Take();
  927. unsigned codepoint = ParseHex4(is, escapeOffset);
  928. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  929. if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
  930. // Handle UTF-16 surrogate pair
  931. if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
  932. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid,
  933. escapeOffset);
  934. unsigned codepoint2 = ParseHex4(is, escapeOffset);
  935. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  936. if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
  937. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid,
  938. escapeOffset);
  939. codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) +
  940. 0x10000;
  941. }
  942. TEncoding::Encode(os, codepoint);
  943. } else
  944. RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
  945. } else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
  946. is.Take();
  947. os.Put('\0'); // null-terminate the string
  948. return;
  949. } else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) <
  950. 0x20)) { // RFC 4627: unescaped = %x20-21 /
  951. // %x23-5B / %x5D-10FFFF
  952. if (c == '\0')
  953. RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
  954. else
  955. RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
  956. } else {
  957. size_t offset = is.Tell();
  958. if (RAPIDJSON_UNLIKELY(
  959. (parseFlags & kParseValidateEncodingFlag
  960. ? !Transcoder<SEncoding, TEncoding>::Validate(is, os)
  961. : !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
  962. RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
  963. }
  964. }
  965. }
  966. template <typename InputStream, typename OutputStream>
  967. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream &,
  968. OutputStream &) {
  969. // Do nothing for generic version
  970. }
  971. #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
  972. // StringStream -> StackStream<char>
  973. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(
  974. StringStream &is, StackStream<char> &os) {
  975. const char *p = is.src_;
  976. // Scan one by one until alignment (unaligned load may cross page boundary
  977. // and cause crash)
  978. const char *nextAligned = reinterpret_cast<const char *>(
  979. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  980. while (p != nextAligned)
  981. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  982. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  983. is.src_ = p;
  984. return;
  985. } else
  986. os.Put(*p++);
  987. // The rest of string using SIMD
  988. static const char dquote[16] = {'\"', '\"', '\"', '\"', '\"', '\"',
  989. '\"', '\"', '\"', '\"', '\"', '\"',
  990. '\"', '\"', '\"', '\"'};
  991. static const char bslash[16] = {'\\', '\\', '\\', '\\', '\\', '\\',
  992. '\\', '\\', '\\', '\\', '\\', '\\',
  993. '\\', '\\', '\\', '\\'};
  994. static const char space[16] = {0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  995. 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  996. 0x1F, 0x1F, 0x1F, 0x1F};
  997. const __m128i dq =
  998. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  999. const __m128i bs =
  1000. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  1001. const __m128i sp =
  1002. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  1003. for (;; p += 16) {
  1004. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  1005. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  1006. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  1007. const __m128i t3 = _mm_cmpeq_epi8(
  1008. _mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  1009. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  1010. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  1011. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  1012. SizeType length;
  1013. #ifdef _MSC_VER // Find the index of first escaped
  1014. unsigned long offset;
  1015. _BitScanForward(&offset, r);
  1016. length = offset;
  1017. #else
  1018. length = static_cast<SizeType>(__builtin_ffs(r) - 1);
  1019. #endif
  1020. if (length != 0) {
  1021. char *q = reinterpret_cast<char *>(os.Push(length));
  1022. for (size_t i = 0; i < length; i++) q[i] = p[i];
  1023. p += length;
  1024. }
  1025. break;
  1026. }
  1027. _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
  1028. }
  1029. is.src_ = p;
  1030. }
  1031. // InsituStringStream -> InsituStringStream
  1032. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(
  1033. InsituStringStream &is, InsituStringStream &os) {
  1034. RAPIDJSON_ASSERT(&is == &os);
  1035. (void)os;
  1036. if (is.src_ == is.dst_) {
  1037. SkipUnescapedString(is);
  1038. return;
  1039. }
  1040. char *p = is.src_;
  1041. char *q = is.dst_;
  1042. // Scan one by one until alignment (unaligned load may cross page boundary
  1043. // and cause crash)
  1044. const char *nextAligned = reinterpret_cast<const char *>(
  1045. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1046. while (p != nextAligned)
  1047. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  1048. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1049. is.src_ = p;
  1050. is.dst_ = q;
  1051. return;
  1052. } else
  1053. *q++ = *p++;
  1054. // The rest of string using SIMD
  1055. static const char dquote[16] = {'\"', '\"', '\"', '\"', '\"', '\"',
  1056. '\"', '\"', '\"', '\"', '\"', '\"',
  1057. '\"', '\"', '\"', '\"'};
  1058. static const char bslash[16] = {'\\', '\\', '\\', '\\', '\\', '\\',
  1059. '\\', '\\', '\\', '\\', '\\', '\\',
  1060. '\\', '\\', '\\', '\\'};
  1061. static const char space[16] = {0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  1062. 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  1063. 0x1F, 0x1F, 0x1F, 0x1F};
  1064. const __m128i dq =
  1065. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  1066. const __m128i bs =
  1067. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  1068. const __m128i sp =
  1069. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  1070. for (;; p += 16, q += 16) {
  1071. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  1072. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  1073. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  1074. const __m128i t3 = _mm_cmpeq_epi8(
  1075. _mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  1076. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  1077. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  1078. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  1079. size_t length;
  1080. #ifdef _MSC_VER // Find the index of first escaped
  1081. unsigned long offset;
  1082. _BitScanForward(&offset, r);
  1083. length = offset;
  1084. #else
  1085. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  1086. #endif
  1087. for (const char *pend = p + length; p != pend;) *q++ = *p++;
  1088. break;
  1089. }
  1090. _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
  1091. }
  1092. is.src_ = p;
  1093. is.dst_ = q;
  1094. }
  1095. // When read/write pointers are the same for insitu stream, just skip
  1096. // unescaped characters
  1097. static RAPIDJSON_FORCEINLINE void SkipUnescapedString(
  1098. InsituStringStream &is) {
  1099. RAPIDJSON_ASSERT(is.src_ == is.dst_);
  1100. char *p = is.src_;
  1101. // Scan one by one until alignment (unaligned load may cross page boundary
  1102. // and cause crash)
  1103. const char *nextAligned = reinterpret_cast<const char *>(
  1104. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1105. for (; p != nextAligned; p++)
  1106. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  1107. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1108. is.src_ = is.dst_ = p;
  1109. return;
  1110. }
  1111. // The rest of string using SIMD
  1112. static const char dquote[16] = {'\"', '\"', '\"', '\"', '\"', '\"',
  1113. '\"', '\"', '\"', '\"', '\"', '\"',
  1114. '\"', '\"', '\"', '\"'};
  1115. static const char bslash[16] = {'\\', '\\', '\\', '\\', '\\', '\\',
  1116. '\\', '\\', '\\', '\\', '\\', '\\',
  1117. '\\', '\\', '\\', '\\'};
  1118. static const char space[16] = {0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  1119. 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
  1120. 0x1F, 0x1F, 0x1F, 0x1F};
  1121. const __m128i dq =
  1122. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  1123. const __m128i bs =
  1124. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  1125. const __m128i sp =
  1126. _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  1127. for (;; p += 16) {
  1128. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  1129. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  1130. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  1131. const __m128i t3 = _mm_cmpeq_epi8(
  1132. _mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  1133. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  1134. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  1135. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  1136. size_t length;
  1137. #ifdef _MSC_VER // Find the index of first escaped
  1138. unsigned long offset;
  1139. _BitScanForward(&offset, r);
  1140. length = offset;
  1141. #else
  1142. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  1143. #endif
  1144. p += length;
  1145. break;
  1146. }
  1147. }
  1148. is.src_ = is.dst_ = p;
  1149. }
  1150. #elif defined(RAPIDJSON_NEON)
  1151. // StringStream -> StackStream<char>
  1152. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(
  1153. StringStream &is, StackStream<char> &os) {
  1154. const char *p = is.src_;
  1155. // Scan one by one until alignment (unaligned load may cross page boundary
  1156. // and cause crash)
  1157. const char *nextAligned = reinterpret_cast<const char *>(
  1158. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1159. while (p != nextAligned)
  1160. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  1161. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1162. is.src_ = p;
  1163. return;
  1164. } else
  1165. os.Put(*p++);
  1166. // The rest of string using SIMD
  1167. const uint8x16_t s0 = vmovq_n_u8('"');
  1168. const uint8x16_t s1 = vmovq_n_u8('\\');
  1169. const uint8x16_t s2 = vmovq_n_u8('\b');
  1170. const uint8x16_t s3 = vmovq_n_u8(32);
  1171. for (;; p += 16) {
  1172. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  1173. uint8x16_t x = vceqq_u8(s, s0);
  1174. x = vorrq_u8(x, vceqq_u8(s, s1));
  1175. x = vorrq_u8(x, vceqq_u8(s, s2));
  1176. x = vorrq_u8(x, vcltq_u8(s, s3));
  1177. x = vrev64q_u8(x); // Rev in 64
  1178. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1179. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1180. SizeType length = 0;
  1181. bool escaped = false;
  1182. if (low == 0) {
  1183. if (high != 0) {
  1184. uint32_t lz = RAPIDJSON_CLZLL(high);
  1185. length = 8 + (lz >> 3);
  1186. escaped = true;
  1187. }
  1188. } else {
  1189. uint32_t lz = RAPIDJSON_CLZLL(low);
  1190. length = lz >> 3;
  1191. escaped = true;
  1192. }
  1193. if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
  1194. if (length != 0) {
  1195. char *q = reinterpret_cast<char *>(os.Push(length));
  1196. for (size_t i = 0; i < length; i++) q[i] = p[i];
  1197. p += length;
  1198. }
  1199. break;
  1200. }
  1201. vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
  1202. }
  1203. is.src_ = p;
  1204. }
  1205. // InsituStringStream -> InsituStringStream
  1206. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(
  1207. InsituStringStream &is, InsituStringStream &os) {
  1208. RAPIDJSON_ASSERT(&is == &os);
  1209. (void)os;
  1210. if (is.src_ == is.dst_) {
  1211. SkipUnescapedString(is);
  1212. return;
  1213. }
  1214. char *p = is.src_;
  1215. char *q = is.dst_;
  1216. // Scan one by one until alignment (unaligned load may cross page boundary
  1217. // and cause crash)
  1218. const char *nextAligned = reinterpret_cast<const char *>(
  1219. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1220. while (p != nextAligned)
  1221. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  1222. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1223. is.src_ = p;
  1224. is.dst_ = q;
  1225. return;
  1226. } else
  1227. *q++ = *p++;
  1228. // The rest of string using SIMD
  1229. const uint8x16_t s0 = vmovq_n_u8('"');
  1230. const uint8x16_t s1 = vmovq_n_u8('\\');
  1231. const uint8x16_t s2 = vmovq_n_u8('\b');
  1232. const uint8x16_t s3 = vmovq_n_u8(32);
  1233. for (;; p += 16, q += 16) {
  1234. const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
  1235. uint8x16_t x = vceqq_u8(s, s0);
  1236. x = vorrq_u8(x, vceqq_u8(s, s1));
  1237. x = vorrq_u8(x, vceqq_u8(s, s2));
  1238. x = vorrq_u8(x, vcltq_u8(s, s3));
  1239. x = vrev64q_u8(x); // Rev in 64
  1240. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1241. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1242. SizeType length = 0;
  1243. bool escaped = false;
  1244. if (low == 0) {
  1245. if (high != 0) {
  1246. uint32_t lz = RAPIDJSON_CLZLL(high);
  1247. length = 8 + (lz >> 3);
  1248. escaped = true;
  1249. }
  1250. } else {
  1251. uint32_t lz = RAPIDJSON_CLZLL(low);
  1252. length = lz >> 3;
  1253. escaped = true;
  1254. }
  1255. if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
  1256. for (const char *pend = p + length; p != pend;) {
  1257. *q++ = *p++;
  1258. }
  1259. break;
  1260. }
  1261. vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
  1262. }
  1263. is.src_ = p;
  1264. is.dst_ = q;
  1265. }
  1266. // When read/write pointers are the same for insitu stream, just skip
  1267. // unescaped characters
  1268. static RAPIDJSON_FORCEINLINE void SkipUnescapedString(
  1269. InsituStringStream &is) {
  1270. RAPIDJSON_ASSERT(is.src_ == is.dst_);
  1271. char *p = is.src_;
  1272. // Scan one by one until alignment (unaligned load may cross page boundary
  1273. // and cause crash)
  1274. const char *nextAligned = reinterpret_cast<const char *>(
  1275. (reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1276. for (; p != nextAligned; p++)
  1277. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') ||
  1278. RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1279. is.src_ = is.dst_ = p;
  1280. return;
  1281. }
  1282. // The rest of string using SIMD
  1283. const uint8x16_t s0 = vmovq_n_u8('"');
  1284. const uint8x16_t s1 = vmovq_n_u8('\\');
  1285. const uint8x16_t s2 = vmovq_n_u8('\b');
  1286. const uint8x16_t s3 = vmovq_n_u8(32);
  1287. for (;; p += 16) {
  1288. const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
  1289. uint8x16_t x = vceqq_u8(s, s0);
  1290. x = vorrq_u8(x, vceqq_u8(s, s1));
  1291. x = vorrq_u8(x, vceqq_u8(s, s2));
  1292. x = vorrq_u8(x, vcltq_u8(s, s3));
  1293. x = vrev64q_u8(x); // Rev in 64
  1294. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1295. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1296. if (low == 0) {
  1297. if (high != 0) {
  1298. uint32_t lz = RAPIDJSON_CLZLL(high);
  1299. p += 8 + (lz >> 3);
  1300. break;
  1301. }
  1302. } else {
  1303. uint32_t lz = RAPIDJSON_CLZLL(low);
  1304. p += lz >> 3;
  1305. break;
  1306. }
  1307. }
  1308. is.src_ = is.dst_ = p;
  1309. }
  1310. #endif // RAPIDJSON_NEON
  1311. template <typename InputStream, bool backup, bool pushOnTake>
  1312. class NumberStream;
  1313. template <typename InputStream>
  1314. class NumberStream<InputStream, false, false> {
  1315. public:
  1316. typedef typename InputStream::Ch Ch;
  1317. NumberStream(GenericReader &reader, InputStream &s) : is(s) {
  1318. (void)reader;
  1319. }
  1320. RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
  1321. RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
  1322. RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
  1323. RAPIDJSON_FORCEINLINE void Push(char) {}
  1324. size_t Tell() { return is.Tell(); }
  1325. size_t Length() { return 0; }
  1326. const char *Pop() { return 0; }
  1327. protected:
  1328. NumberStream &operator=(const NumberStream &);
  1329. InputStream &is;
  1330. };
  1331. template <typename InputStream>
  1332. class NumberStream<InputStream, true, false>
  1333. : public NumberStream<InputStream, false, false> {
  1334. typedef NumberStream<InputStream, false, false> Base;
  1335. public:
  1336. NumberStream(GenericReader &reader, InputStream &is)
  1337. : Base(reader, is), stackStream(reader.stack_) {}
  1338. RAPIDJSON_FORCEINLINE Ch TakePush() {
  1339. stackStream.Put(static_cast<char>(Base::is.Peek()));
  1340. return Base::is.Take();
  1341. }
  1342. RAPIDJSON_FORCEINLINE void Push(char c) { stackStream.Put(c); }
  1343. size_t Length() { return stackStream.Length(); }
  1344. const char *Pop() {
  1345. stackStream.Put('\0');
  1346. return stackStream.Pop();
  1347. }
  1348. private:
  1349. StackStream<char> stackStream;
  1350. };
  1351. template <typename InputStream>
  1352. class NumberStream<InputStream, true, true>
  1353. : public NumberStream<InputStream, true, false> {
  1354. typedef NumberStream<InputStream, true, false> Base;
  1355. public:
  1356. NumberStream(GenericReader &reader, InputStream &is) : Base(reader, is) {}
  1357. RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
  1358. };
  1359. template <unsigned parseFlags, typename InputStream, typename Handler>
  1360. void ParseNumber(InputStream &is, Handler &handler) {
  1361. internal::StreamLocalCopy<InputStream> copy(is);
  1362. NumberStream<InputStream,
  1363. ((parseFlags & kParseNumbersAsStringsFlag) != 0)
  1364. ? ((parseFlags & kParseInsituFlag) == 0)
  1365. : ((parseFlags & kParseFullPrecisionFlag) != 0),
  1366. (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
  1367. (parseFlags & kParseInsituFlag) == 0>
  1368. s(*this, copy.s);
  1369. size_t startOffset = s.Tell();
  1370. double d = 0.0;
  1371. bool useNanOrInf = false;
  1372. // Parse minus
  1373. bool minus = Consume(s, '-');
  1374. // Parse int: zero / ( digit1-9 *DIGIT )
  1375. unsigned i = 0;
  1376. uint64_t i64 = 0;
  1377. bool use64bit = false;
  1378. int significandDigit = 0;
  1379. if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
  1380. i = 0;
  1381. s.TakePush();
  1382. } else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
  1383. i = static_cast<unsigned>(s.TakePush() - '0');
  1384. if (minus)
  1385. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1386. if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
  1387. if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
  1388. i64 = i;
  1389. use64bit = true;
  1390. break;
  1391. }
  1392. }
  1393. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1394. significandDigit++;
  1395. }
  1396. else
  1397. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1398. if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
  1399. if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
  1400. i64 = i;
  1401. use64bit = true;
  1402. break;
  1403. }
  1404. }
  1405. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1406. significandDigit++;
  1407. }
  1408. }
  1409. // Parse NaN or Infinity here
  1410. else if ((parseFlags & kParseNanAndInfFlag) &&
  1411. RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
  1412. if (Consume(s, 'N')) {
  1413. if (Consume(s, 'a') && Consume(s, 'N')) {
  1414. d = std::numeric_limits<double>::quiet_NaN();
  1415. useNanOrInf = true;
  1416. }
  1417. } else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
  1418. if (Consume(s, 'n') && Consume(s, 'f')) {
  1419. d = (minus ? -std::numeric_limits<double>::infinity()
  1420. : std::numeric_limits<double>::infinity());
  1421. useNanOrInf = true;
  1422. if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' &&
  1423. !(Consume(s, 'i') && Consume(s, 'n') &&
  1424. Consume(s, 'i') && Consume(s, 't') &&
  1425. Consume(s, 'y')))) {
  1426. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1427. }
  1428. }
  1429. }
  1430. if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
  1431. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1432. }
  1433. } else
  1434. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1435. // Parse 64bit int
  1436. bool useDouble = false;
  1437. if (use64bit) {
  1438. if (minus)
  1439. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1440. if (RAPIDJSON_UNLIKELY(
  1441. i64 >=
  1442. RAPIDJSON_UINT64_C2(
  1443. 0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
  1444. if (RAPIDJSON_LIKELY(
  1445. i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) ||
  1446. s.Peek() > '8')) {
  1447. d = static_cast<double>(i64);
  1448. useDouble = true;
  1449. break;
  1450. }
  1451. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1452. significandDigit++;
  1453. }
  1454. else
  1455. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1456. if (RAPIDJSON_UNLIKELY(
  1457. i64 >= RAPIDJSON_UINT64_C2(
  1458. 0x19999999,
  1459. 0x99999999))) // 2^64 - 1 = 18446744073709551615
  1460. if (RAPIDJSON_LIKELY(
  1461. i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
  1462. s.Peek() > '5')) {
  1463. d = static_cast<double>(i64);
  1464. useDouble = true;
  1465. break;
  1466. }
  1467. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1468. significandDigit++;
  1469. }
  1470. }
  1471. // Force double for big integer
  1472. if (useDouble) {
  1473. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1474. d = d * 10 + (s.TakePush() - '0');
  1475. }
  1476. }
  1477. // Parse frac = decimal-point 1*DIGIT
  1478. int expFrac = 0;
  1479. size_t decimalPosition;
  1480. if (Consume(s, '.')) {
  1481. decimalPosition = s.Length();
  1482. if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
  1483. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
  1484. if (!useDouble) {
  1485. #if RAPIDJSON_64BIT
  1486. // Use i64 to store significand in 64-bit architecture
  1487. if (!use64bit) i64 = i;
  1488. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1489. if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF,
  1490. 0xFFFFFFFF)) // 2^53 - 1 for fast path
  1491. break;
  1492. else {
  1493. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1494. --expFrac;
  1495. if (i64 != 0) significandDigit++;
  1496. }
  1497. }
  1498. d = static_cast<double>(i64);
  1499. #else
  1500. // Use double to store significand in 32-bit architecture
  1501. d = static_cast<double>(use64bit ? i64 : i);
  1502. #endif
  1503. useDouble = true;
  1504. }
  1505. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1506. if (significandDigit < 17) {
  1507. d = d * 10.0 + (s.TakePush() - '0');
  1508. --expFrac;
  1509. if (RAPIDJSON_LIKELY(d > 0.0)) significandDigit++;
  1510. } else
  1511. s.TakePush();
  1512. }
  1513. } else
  1514. decimalPosition = s.Length(); // decimal position at the end of integer.
  1515. // Parse exp = e [ minus / plus ] 1*DIGIT
  1516. int exp = 0;
  1517. if (Consume(s, 'e') || Consume(s, 'E')) {
  1518. if (!useDouble) {
  1519. d = static_cast<double>(use64bit ? i64 : i);
  1520. useDouble = true;
  1521. }
  1522. bool expMinus = false;
  1523. if (Consume(s, '+'))
  1524. ;
  1525. else if (Consume(s, '-'))
  1526. expMinus = true;
  1527. if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1528. exp = static_cast<int>(s.Take() - '0');
  1529. if (expMinus) {
  1530. // (exp + expFrac) must not underflow int => we're detecting when -exp
  1531. // gets dangerously close to INT_MIN (a pessimistic next digit 9 would
  1532. // push it into underflow territory):
  1533. //
  1534. // -(exp * 10 + 9) + expFrac >= INT_MIN
  1535. // <=> exp <= (expFrac - INT_MIN - 9) / 10
  1536. RAPIDJSON_ASSERT(expFrac <= 0);
  1537. int maxExp = (expFrac + 2147483639) / 10;
  1538. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1539. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1540. if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
  1541. while (RAPIDJSON_UNLIKELY(
  1542. s.Peek() >= '0' &&
  1543. s.Peek() <= '9')) // Consume the rest of exponent
  1544. s.Take();
  1545. }
  1546. }
  1547. } else { // positive exp
  1548. int maxExp = 308 - expFrac;
  1549. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1550. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1551. if (RAPIDJSON_UNLIKELY(exp > maxExp))
  1552. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1553. }
  1554. }
  1555. } else
  1556. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
  1557. if (expMinus) exp = -exp;
  1558. }
  1559. // Finish parsing, call event according to the type of number.
  1560. bool cont = true;
  1561. if (parseFlags & kParseNumbersAsStringsFlag) {
  1562. if (parseFlags & kParseInsituFlag) {
  1563. s.Pop(); // Pop stack no matter if it will be used or not.
  1564. typename InputStream::Ch *head = is.PutBegin();
  1565. const size_t length = s.Tell() - startOffset;
  1566. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  1567. // unable to insert the \0 character here, it will erase the comma after
  1568. // this number
  1569. const typename TargetEncoding::Ch *const str =
  1570. reinterpret_cast<typename TargetEncoding::Ch *>(head);
  1571. cont = handler.RawNumber(str, SizeType(length), false);
  1572. } else {
  1573. SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
  1574. StringStream srcStream(s.Pop());
  1575. StackStream<typename TargetEncoding::Ch> dstStream(stack_);
  1576. while (numCharsToCopy--) {
  1577. Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
  1578. }
  1579. dstStream.Put('\0');
  1580. const typename TargetEncoding::Ch *str = dstStream.Pop();
  1581. const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
  1582. cont = handler.RawNumber(str, SizeType(length), true);
  1583. }
  1584. } else {
  1585. size_t length = s.Length();
  1586. const char *decimal =
  1587. s.Pop(); // Pop stack no matter if it will be used or not.
  1588. if (useDouble) {
  1589. int p = exp + expFrac;
  1590. if (parseFlags & kParseFullPrecisionFlag)
  1591. d = internal::StrtodFullPrecision(d, p, decimal, length,
  1592. decimalPosition, exp);
  1593. else
  1594. d = internal::StrtodNormalPrecision(d, p);
  1595. // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
  1596. if (d > (std::numeric_limits<double>::max)()) {
  1597. // Overflow
  1598. // TODO: internal::StrtodX should report overflow (or underflow)
  1599. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1600. }
  1601. cont = handler.Double(minus ? -d : d);
  1602. } else if (useNanOrInf) {
  1603. cont = handler.Double(d);
  1604. } else {
  1605. if (use64bit) {
  1606. if (minus)
  1607. cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
  1608. else
  1609. cont = handler.Uint64(i64);
  1610. } else {
  1611. if (minus)
  1612. cont = handler.Int(static_cast<int32_t>(~i + 1));
  1613. else
  1614. cont = handler.Uint(i);
  1615. }
  1616. }
  1617. }
  1618. if (RAPIDJSON_UNLIKELY(!cont))
  1619. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
  1620. }
  1621. // Parse any JSON value
  1622. template <unsigned parseFlags, typename InputStream, typename Handler>
  1623. void ParseValue(InputStream &is, Handler &handler) {
  1624. switch (is.Peek()) {
  1625. case 'n':
  1626. ParseNull<parseFlags>(is, handler);
  1627. break;
  1628. case 't':
  1629. ParseTrue<parseFlags>(is, handler);
  1630. break;
  1631. case 'f':
  1632. ParseFalse<parseFlags>(is, handler);
  1633. break;
  1634. case '"':
  1635. ParseString<parseFlags>(is, handler);
  1636. break;
  1637. case '{':
  1638. ParseObject<parseFlags>(is, handler);
  1639. break;
  1640. case '[':
  1641. ParseArray<parseFlags>(is, handler);
  1642. break;
  1643. default:
  1644. ParseNumber<parseFlags>(is, handler);
  1645. break;
  1646. }
  1647. }
  1648. // Iterative Parsing
  1649. // States
  1650. enum IterativeParsingState {
  1651. IterativeParsingFinishState = 0, // sink states at top
  1652. IterativeParsingErrorState, // sink states at top
  1653. IterativeParsingStartState,
  1654. // Object states
  1655. IterativeParsingObjectInitialState,
  1656. IterativeParsingMemberKeyState,
  1657. IterativeParsingMemberValueState,
  1658. IterativeParsingObjectFinishState,
  1659. // Array states
  1660. IterativeParsingArrayInitialState,
  1661. IterativeParsingElementState,
  1662. IterativeParsingArrayFinishState,
  1663. // Single value state
  1664. IterativeParsingValueState,
  1665. // Delimiter states (at bottom)
  1666. IterativeParsingElementDelimiterState,
  1667. IterativeParsingMemberDelimiterState,
  1668. IterativeParsingKeyValueDelimiterState,
  1669. cIterativeParsingStateCount
  1670. };
  1671. // Tokens
  1672. enum Token {
  1673. LeftBracketToken = 0,
  1674. RightBracketToken,
  1675. LeftCurlyBracketToken,
  1676. RightCurlyBracketToken,
  1677. CommaToken,
  1678. ColonToken,
  1679. StringToken,
  1680. FalseToken,
  1681. TrueToken,
  1682. NullToken,
  1683. NumberToken,
  1684. kTokenCount
  1685. };
  1686. RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
  1687. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  1688. #define N NumberToken
  1689. #define N16 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
  1690. // Maps from ASCII to Token
  1691. static const unsigned char tokenMap[256] = {
  1692. N16, // 00~0F
  1693. N16, // 10~1F
  1694. N, N,
  1695. StringToken, N,
  1696. N, N,
  1697. N, N,
  1698. N, N,
  1699. N, N,
  1700. CommaToken, N,
  1701. N, N, // 20~2F
  1702. N, N,
  1703. N, N,
  1704. N, N,
  1705. N, N,
  1706. N, N,
  1707. ColonToken, N,
  1708. N, N,
  1709. N, N, // 30~3F
  1710. N16, // 40~4F
  1711. N, N,
  1712. N, N,
  1713. N, N,
  1714. N, N,
  1715. N, N,
  1716. N, LeftBracketToken,
  1717. N, RightBracketToken,
  1718. N, N, // 50~5F
  1719. N, N,
  1720. N, N,
  1721. N, N,
  1722. FalseToken, N,
  1723. N, N,
  1724. N, N,
  1725. N, N,
  1726. NullToken, N, // 60~6F
  1727. N, N,
  1728. N, N,
  1729. TrueToken, N,
  1730. N, N,
  1731. N, N,
  1732. N, LeftCurlyBracketToken,
  1733. N, RightCurlyBracketToken,
  1734. N, N, // 70~7F
  1735. N16, N16,
  1736. N16, N16,
  1737. N16, N16,
  1738. N16, N16 // 80~FF
  1739. };
  1740. #undef N
  1741. #undef N16
  1742. //!@endcond
  1743. if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
  1744. return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
  1745. else
  1746. return NumberToken;
  1747. }
  1748. RAPIDJSON_FORCEINLINE IterativeParsingState
  1749. Predict(IterativeParsingState state, Token token) const {
  1750. // current state x one lookahead token -> new state
  1751. static const char G[cIterativeParsingStateCount][kTokenCount] = {
  1752. // Finish(sink state)
  1753. {IterativeParsingErrorState, IterativeParsingErrorState,
  1754. IterativeParsingErrorState, IterativeParsingErrorState,
  1755. IterativeParsingErrorState, IterativeParsingErrorState,
  1756. IterativeParsingErrorState, IterativeParsingErrorState,
  1757. IterativeParsingErrorState, IterativeParsingErrorState,
  1758. IterativeParsingErrorState},
  1759. // Error(sink state)
  1760. {IterativeParsingErrorState, IterativeParsingErrorState,
  1761. IterativeParsingErrorState, IterativeParsingErrorState,
  1762. IterativeParsingErrorState, IterativeParsingErrorState,
  1763. IterativeParsingErrorState, IterativeParsingErrorState,
  1764. IterativeParsingErrorState, IterativeParsingErrorState,
  1765. IterativeParsingErrorState},
  1766. // Start
  1767. {
  1768. IterativeParsingArrayInitialState, // Left bracket
  1769. IterativeParsingErrorState, // Right bracket
  1770. IterativeParsingObjectInitialState, // Left curly bracket
  1771. IterativeParsingErrorState, // Right curly bracket
  1772. IterativeParsingErrorState, // Comma
  1773. IterativeParsingErrorState, // Colon
  1774. IterativeParsingValueState, // String
  1775. IterativeParsingValueState, // False
  1776. IterativeParsingValueState, // True
  1777. IterativeParsingValueState, // Null
  1778. IterativeParsingValueState // Number
  1779. },
  1780. // ObjectInitial
  1781. {
  1782. IterativeParsingErrorState, // Left bracket
  1783. IterativeParsingErrorState, // Right bracket
  1784. IterativeParsingErrorState, // Left curly bracket
  1785. IterativeParsingObjectFinishState, // Right curly bracket
  1786. IterativeParsingErrorState, // Comma
  1787. IterativeParsingErrorState, // Colon
  1788. IterativeParsingMemberKeyState, // String
  1789. IterativeParsingErrorState, // False
  1790. IterativeParsingErrorState, // True
  1791. IterativeParsingErrorState, // Null
  1792. IterativeParsingErrorState // Number
  1793. },
  1794. // MemberKey
  1795. {
  1796. IterativeParsingErrorState, // Left bracket
  1797. IterativeParsingErrorState, // Right bracket
  1798. IterativeParsingErrorState, // Left curly bracket
  1799. IterativeParsingErrorState, // Right curly bracket
  1800. IterativeParsingErrorState, // Comma
  1801. IterativeParsingKeyValueDelimiterState, // Colon
  1802. IterativeParsingErrorState, // String
  1803. IterativeParsingErrorState, // False
  1804. IterativeParsingErrorState, // True
  1805. IterativeParsingErrorState, // Null
  1806. IterativeParsingErrorState // Number
  1807. },
  1808. // MemberValue
  1809. {
  1810. IterativeParsingErrorState, // Left bracket
  1811. IterativeParsingErrorState, // Right bracket
  1812. IterativeParsingErrorState, // Left curly bracket
  1813. IterativeParsingObjectFinishState, // Right curly bracket
  1814. IterativeParsingMemberDelimiterState, // Comma
  1815. IterativeParsingErrorState, // Colon
  1816. IterativeParsingErrorState, // String
  1817. IterativeParsingErrorState, // False
  1818. IterativeParsingErrorState, // True
  1819. IterativeParsingErrorState, // Null
  1820. IterativeParsingErrorState // Number
  1821. },
  1822. // ObjectFinish(sink state)
  1823. {IterativeParsingErrorState, IterativeParsingErrorState,
  1824. IterativeParsingErrorState, IterativeParsingErrorState,
  1825. IterativeParsingErrorState, IterativeParsingErrorState,
  1826. IterativeParsingErrorState, IterativeParsingErrorState,
  1827. IterativeParsingErrorState, IterativeParsingErrorState,
  1828. IterativeParsingErrorState},
  1829. // ArrayInitial
  1830. {
  1831. IterativeParsingArrayInitialState, // Left bracket(push Element
  1832. // state)
  1833. IterativeParsingArrayFinishState, // Right bracket
  1834. IterativeParsingObjectInitialState, // Left curly bracket(push
  1835. // Element state)
  1836. IterativeParsingErrorState, // Right curly bracket
  1837. IterativeParsingErrorState, // Comma
  1838. IterativeParsingErrorState, // Colon
  1839. IterativeParsingElementState, // String
  1840. IterativeParsingElementState, // False
  1841. IterativeParsingElementState, // True
  1842. IterativeParsingElementState, // Null
  1843. IterativeParsingElementState // Number
  1844. },
  1845. // Element
  1846. {
  1847. IterativeParsingErrorState, // Left bracket
  1848. IterativeParsingArrayFinishState, // Right bracket
  1849. IterativeParsingErrorState, // Left curly bracket
  1850. IterativeParsingErrorState, // Right curly bracket
  1851. IterativeParsingElementDelimiterState, // Comma
  1852. IterativeParsingErrorState, // Colon
  1853. IterativeParsingErrorState, // String
  1854. IterativeParsingErrorState, // False
  1855. IterativeParsingErrorState, // True
  1856. IterativeParsingErrorState, // Null
  1857. IterativeParsingErrorState // Number
  1858. },
  1859. // ArrayFinish(sink state)
  1860. {IterativeParsingErrorState, IterativeParsingErrorState,
  1861. IterativeParsingErrorState, IterativeParsingErrorState,
  1862. IterativeParsingErrorState, IterativeParsingErrorState,
  1863. IterativeParsingErrorState, IterativeParsingErrorState,
  1864. IterativeParsingErrorState, IterativeParsingErrorState,
  1865. IterativeParsingErrorState},
  1866. // Single Value (sink state)
  1867. {IterativeParsingErrorState, IterativeParsingErrorState,
  1868. IterativeParsingErrorState, IterativeParsingErrorState,
  1869. IterativeParsingErrorState, IterativeParsingErrorState,
  1870. IterativeParsingErrorState, IterativeParsingErrorState,
  1871. IterativeParsingErrorState, IterativeParsingErrorState,
  1872. IterativeParsingErrorState},
  1873. // ElementDelimiter
  1874. {
  1875. IterativeParsingArrayInitialState, // Left bracket(push Element
  1876. // state)
  1877. IterativeParsingArrayFinishState, // Right bracket
  1878. IterativeParsingObjectInitialState, // Left curly bracket(push
  1879. // Element state)
  1880. IterativeParsingErrorState, // Right curly bracket
  1881. IterativeParsingErrorState, // Comma
  1882. IterativeParsingErrorState, // Colon
  1883. IterativeParsingElementState, // String
  1884. IterativeParsingElementState, // False
  1885. IterativeParsingElementState, // True
  1886. IterativeParsingElementState, // Null
  1887. IterativeParsingElementState // Number
  1888. },
  1889. // MemberDelimiter
  1890. {
  1891. IterativeParsingErrorState, // Left bracket
  1892. IterativeParsingErrorState, // Right bracket
  1893. IterativeParsingErrorState, // Left curly bracket
  1894. IterativeParsingObjectFinishState, // Right curly bracket
  1895. IterativeParsingErrorState, // Comma
  1896. IterativeParsingErrorState, // Colon
  1897. IterativeParsingMemberKeyState, // String
  1898. IterativeParsingErrorState, // False
  1899. IterativeParsingErrorState, // True
  1900. IterativeParsingErrorState, // Null
  1901. IterativeParsingErrorState // Number
  1902. },
  1903. // KeyValueDelimiter
  1904. {
  1905. IterativeParsingArrayInitialState, // Left bracket(push MemberValue
  1906. // state)
  1907. IterativeParsingErrorState, // Right bracket
  1908. IterativeParsingObjectInitialState, // Left curly bracket(push
  1909. // MemberValue state)
  1910. IterativeParsingErrorState, // Right curly bracket
  1911. IterativeParsingErrorState, // Comma
  1912. IterativeParsingErrorState, // Colon
  1913. IterativeParsingMemberValueState, // String
  1914. IterativeParsingMemberValueState, // False
  1915. IterativeParsingMemberValueState, // True
  1916. IterativeParsingMemberValueState, // Null
  1917. IterativeParsingMemberValueState // Number
  1918. },
  1919. }; // End of G
  1920. return static_cast<IterativeParsingState>(G[state][token]);
  1921. }
  1922. // Make an advance in the token stream and state based on the candidate
  1923. // destination state which was returned by Transit(). May return a new state
  1924. // on state pop.
  1925. template <unsigned parseFlags, typename InputStream, typename Handler>
  1926. RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src,
  1927. Token token,
  1928. IterativeParsingState dst,
  1929. InputStream &is,
  1930. Handler &handler) {
  1931. (void)token;
  1932. switch (dst) {
  1933. case IterativeParsingErrorState:
  1934. return dst;
  1935. case IterativeParsingObjectInitialState:
  1936. case IterativeParsingArrayInitialState: {
  1937. // Push the state(Element or MemeberValue) if we are nested in another
  1938. // array or value of member. In this way we can get the correct state on
  1939. // ObjectFinish or ArrayFinish by frame pop.
  1940. IterativeParsingState n = src;
  1941. if (src == IterativeParsingArrayInitialState ||
  1942. src == IterativeParsingElementDelimiterState)
  1943. n = IterativeParsingElementState;
  1944. else if (src == IterativeParsingKeyValueDelimiterState)
  1945. n = IterativeParsingMemberValueState;
  1946. // Push current state.
  1947. *stack_.template Push<SizeType>(1) = n;
  1948. // Initialize and push the member/element count.
  1949. *stack_.template Push<SizeType>(1) = 0;
  1950. // Call handler
  1951. bool hr = (dst == IterativeParsingObjectInitialState)
  1952. ? handler.StartObject()
  1953. : handler.StartArray();
  1954. // On handler short circuits the parsing.
  1955. if (!hr) {
  1956. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1957. return IterativeParsingErrorState;
  1958. } else {
  1959. is.Take();
  1960. return dst;
  1961. }
  1962. }
  1963. case IterativeParsingMemberKeyState:
  1964. ParseString<parseFlags>(is, handler, true);
  1965. if (HasParseError())
  1966. return IterativeParsingErrorState;
  1967. else
  1968. return dst;
  1969. case IterativeParsingKeyValueDelimiterState:
  1970. RAPIDJSON_ASSERT(token == ColonToken);
  1971. is.Take();
  1972. return dst;
  1973. case IterativeParsingMemberValueState:
  1974. // Must be non-compound value. Or it would be ObjectInitial or
  1975. // ArrayInitial state.
  1976. ParseValue<parseFlags>(is, handler);
  1977. if (HasParseError()) {
  1978. return IterativeParsingErrorState;
  1979. }
  1980. return dst;
  1981. case IterativeParsingElementState:
  1982. // Must be non-compound value. Or it would be ObjectInitial or
  1983. // ArrayInitial state.
  1984. ParseValue<parseFlags>(is, handler);
  1985. if (HasParseError()) {
  1986. return IterativeParsingErrorState;
  1987. }
  1988. return dst;
  1989. case IterativeParsingMemberDelimiterState:
  1990. case IterativeParsingElementDelimiterState:
  1991. is.Take();
  1992. // Update member/element count.
  1993. *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
  1994. return dst;
  1995. case IterativeParsingObjectFinishState: {
  1996. // Transit from delimiter is only allowed when trailing commas are
  1997. // enabled
  1998. if (!(parseFlags & kParseTrailingCommasFlag) &&
  1999. src == IterativeParsingMemberDelimiterState) {
  2000. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
  2001. return IterativeParsingErrorState;
  2002. }
  2003. // Get member count.
  2004. SizeType c = *stack_.template Pop<SizeType>(1);
  2005. // If the object is not empty, count the last member.
  2006. if (src == IterativeParsingMemberValueState) ++c;
  2007. // Restore the state.
  2008. IterativeParsingState n = static_cast<IterativeParsingState>(
  2009. *stack_.template Pop<SizeType>(1));
  2010. // Transit to Finish state if this is the topmost scope.
  2011. if (n == IterativeParsingStartState) n = IterativeParsingFinishState;
  2012. // Call handler
  2013. bool hr = handler.EndObject(c);
  2014. // On handler short circuits the parsing.
  2015. if (!hr) {
  2016. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  2017. return IterativeParsingErrorState;
  2018. } else {
  2019. is.Take();
  2020. return n;
  2021. }
  2022. }
  2023. case IterativeParsingArrayFinishState: {
  2024. // Transit from delimiter is only allowed when trailing commas are
  2025. // enabled
  2026. if (!(parseFlags & kParseTrailingCommasFlag) &&
  2027. src == IterativeParsingElementDelimiterState) {
  2028. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
  2029. return IterativeParsingErrorState;
  2030. }
  2031. // Get element count.
  2032. SizeType c = *stack_.template Pop<SizeType>(1);
  2033. // If the array is not empty, count the last element.
  2034. if (src == IterativeParsingElementState) ++c;
  2035. // Restore the state.
  2036. IterativeParsingState n = static_cast<IterativeParsingState>(
  2037. *stack_.template Pop<SizeType>(1));
  2038. // Transit to Finish state if this is the topmost scope.
  2039. if (n == IterativeParsingStartState) n = IterativeParsingFinishState;
  2040. // Call handler
  2041. bool hr = handler.EndArray(c);
  2042. // On handler short circuits the parsing.
  2043. if (!hr) {
  2044. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  2045. return IterativeParsingErrorState;
  2046. } else {
  2047. is.Take();
  2048. return n;
  2049. }
  2050. }
  2051. default:
  2052. // This branch is for IterativeParsingValueState actually.
  2053. // Use `default:` rather than
  2054. // `case IterativeParsingValueState:` is for code coverage.
  2055. // The IterativeParsingStartState is not enumerated in this switch-case.
  2056. // It is impossible for that case. And it can be caught by following
  2057. // assertion.
  2058. // The IterativeParsingFinishState is not enumerated in this switch-case
  2059. // either. It is a "derivative" state which cannot triggered from
  2060. // Predict() directly. Therefore it cannot happen here. And it can be
  2061. // caught by following assertion.
  2062. RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
  2063. // Must be non-compound value. Or it would be ObjectInitial or
  2064. // ArrayInitial state.
  2065. ParseValue<parseFlags>(is, handler);
  2066. if (HasParseError()) {
  2067. return IterativeParsingErrorState;
  2068. }
  2069. return IterativeParsingFinishState;
  2070. }
  2071. }
  2072. template <typename InputStream>
  2073. void HandleError(IterativeParsingState src, InputStream &is) {
  2074. if (HasParseError()) {
  2075. // Error flag has been set.
  2076. return;
  2077. }
  2078. switch (src) {
  2079. case IterativeParsingStartState:
  2080. RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell());
  2081. return;
  2082. case IterativeParsingFinishState:
  2083. RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell());
  2084. return;
  2085. case IterativeParsingObjectInitialState:
  2086. case IterativeParsingMemberDelimiterState:
  2087. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
  2088. return;
  2089. case IterativeParsingMemberKeyState:
  2090. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
  2091. return;
  2092. case IterativeParsingMemberValueState:
  2093. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket,
  2094. is.Tell());
  2095. return;
  2096. case IterativeParsingKeyValueDelimiterState:
  2097. case IterativeParsingArrayInitialState:
  2098. case IterativeParsingElementDelimiterState:
  2099. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  2100. return;
  2101. default:
  2102. RAPIDJSON_ASSERT(src == IterativeParsingElementState);
  2103. RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket,
  2104. is.Tell());
  2105. return;
  2106. }
  2107. }
  2108. RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(
  2109. IterativeParsingState s) const {
  2110. return s >= IterativeParsingElementDelimiterState;
  2111. }
  2112. RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(
  2113. IterativeParsingState s) const {
  2114. return s <= IterativeParsingErrorState;
  2115. }
  2116. template <unsigned parseFlags, typename InputStream, typename Handler>
  2117. ParseResult IterativeParse(InputStream &is, Handler &handler) {
  2118. parseResult_.Clear();
  2119. ClearStackOnExit scope(*this);
  2120. IterativeParsingState state = IterativeParsingStartState;
  2121. SkipWhitespaceAndComments<parseFlags>(is);
  2122. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  2123. while (is.Peek() != '\0') {
  2124. Token t = Tokenize(is.Peek());
  2125. IterativeParsingState n = Predict(state, t);
  2126. IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
  2127. if (d == IterativeParsingErrorState) {
  2128. HandleError(state, is);
  2129. break;
  2130. }
  2131. state = d;
  2132. // Do not further consume streams if a root JSON has been parsed.
  2133. if ((parseFlags & kParseStopWhenDoneFlag) &&
  2134. state == IterativeParsingFinishState)
  2135. break;
  2136. SkipWhitespaceAndComments<parseFlags>(is);
  2137. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  2138. }
  2139. // Handle the end of file.
  2140. if (state != IterativeParsingFinishState) HandleError(state, is);
  2141. return parseResult_;
  2142. }
  2143. static const size_t kDefaultStackCapacity =
  2144. 256; //!< Default stack capacity in bytes for storing a single decoded
  2145. //!< string.
  2146. internal::Stack<StackAllocator>
  2147. stack_; //!< A stack for storing decoded string temporarily during
  2148. //!< non-destructive parsing.
  2149. ParseResult parseResult_;
  2150. IterativeParsingState state_;
  2151. }; // class GenericReader
  2152. //! Reader with UTF8 encoding and default allocator.
  2153. typedef GenericReader<UTF8<>, UTF8<>> Reader;
  2154. RAPIDJSON_NAMESPACE_END
  2155. #if defined(__clang__) || defined(_MSC_VER)
  2156. RAPIDJSON_DIAG_POP
  2157. #endif
  2158. #ifdef __GNUC__
  2159. RAPIDJSON_DIAG_POP
  2160. #endif
  2161. #endif // RAPIDJSON_READER_H_