From 68fb58f6c2791182518b8578c7c9eeabfb5e2e7e Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 15 May 2024 16:34:57 +0200 Subject: [PATCH] Bring forward changes from https://github.com/duckdb/duckdb_iceberg/pull/50 --- third_party/yyjson/include/yyjson.hpp | 2516 ++++++++++++++-- third_party/yyjson/yyjson.cpp | 3792 +++++++++++++++++-------- 2 files changed, 4743 insertions(+), 1565 deletions(-) diff --git a/third_party/yyjson/include/yyjson.hpp b/third_party/yyjson/include/yyjson.hpp index 0533238491e2..aa45ded55f23 100644 --- a/third_party/yyjson/include/yyjson.hpp +++ b/third_party/yyjson/include/yyjson.hpp @@ -1,12 +1,30 @@ /*============================================================================== - * Created by Yaoyuan on 2019/3/9. - * Copyright (C) 2019 Yaoyuan . - * - * Released under the MIT License: - * https://github.com/ibireme/yyjson/blob/master/LICENSE + Copyright (c) 2020 YaoYuan + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. *============================================================================*/ -/** @file yyjson.h */ +/** + @file yyjson.h + @date 2019-03-09 + @author YaoYuan + */ #ifndef DUCKDB_YYJSON_H #define DUCKDB_YYJSON_H @@ -17,6 +35,7 @@ * Header Files *============================================================================*/ +#include #include #include #include @@ -32,12 +51,12 @@ namespace duckdb_yyjson { *============================================================================*/ /* - Define as 1 to disable JSON reader if you don't need to parse JSON. + Define as 1 to disable JSON reader if JSON parsing is not required. This will disable these functions at compile-time: + - yyjson_read() - yyjson_read_opts() - yyjson_read_file() - - yyjson_read() - yyjson_read_number() - yyjson_mut_read_number() @@ -47,7 +66,7 @@ namespace duckdb_yyjson { #endif /* - Define as 1 to disable JSON writer if you don't need to serialize JSON. + Define as 1 to disable JSON writer if JSON serialization is not required. This will disable these functions at compile-time: - yyjson_write() @@ -68,6 +87,22 @@ namespace duckdb_yyjson { #ifndef YYJSON_DISABLE_WRITER #endif +/* + Define as 1 to disable JSON Pointer, JSON Patch and JSON Merge Patch supports. + + This will disable these functions at compile-time: + - yyjson_ptr_xxx() + - yyjson_mut_ptr_xxx() + - yyjson_doc_ptr_xxx() + - yyjson_mut_doc_ptr_xxx() + - yyjson_patch() + - yyjson_mut_patch() + - yyjson_merge_patch() + - yyjson_mut_merge_patch() + */ +#ifndef YYJSON_DISABLE_UTILS +#endif + /* Define as 1 to disable the fast floating-point number conversion in yyjson, and use libc's `strtod/snprintf` instead. @@ -93,19 +128,36 @@ namespace duckdb_yyjson { - YYJSON_WRITE_ALLOW_INF_AND_NAN - YYJSON_WRITE_ALLOW_INVALID_UNICODE - This will reduce the binary size by about 10%, and slightly improve the JSON - read/write speed. + This will reduce the binary size by about 10%, and speed up the reading and + writing speed by about 2% to 6%. */ #ifndef YYJSON_DISABLE_NON_STANDARD #endif /* - Define as 1 to disable unaligned memory access if target architecture does not - support unaligned memory access (such as some embedded processors). + Define as 1 to disable UTF-8 validation at compile time. - If this value is not defined, yyjson will perform some automatic detection. - The wrong definition of this option may cause some performance degradation, - but will not cause any run-time errors. + If all input strings are guaranteed to be valid UTF-8 encoding (for example, + some language's String object has already validated the encoding), using this + flag can avoid redundant UTF-8 validation in yyjson. + + This flag can speed up the reading and writing speed of non-ASCII encoded + strings by about 3% to 7%. + + Note: If this flag is used while passing in illegal UTF-8 strings, the + following errors may occur: + - Escaped characters may be ignored when parsing JSON strings. + - Ending quotes may be ignored when parsing JSON strings, causing the string + to be concatenated to the next value. + - When accessing `yyjson_mut_val` for serialization, the string ending may be + accessed out of bounds, causing a segmentation fault. + */ +#ifndef YYJSON_DISABLE_UTF8_VALIDATION +#endif + +/* + Define as 1 to indicate that the target architecture does not support unaligned + memory access. Please refer to the comments in the C file for details. */ #ifndef YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS #endif @@ -142,8 +194,26 @@ namespace duckdb_yyjson { /** compiler version (GCC) */ #ifdef __GNUC__ # define YYJSON_GCC_VER __GNUC__ +# if defined(__GNUC_PATCHLEVEL__) +# define yyjson_gcc_available(major, minor, patch) \ + ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) \ + >= (major * 10000 + minor * 100 + patch)) +# else +# define yyjson_gcc_available(major, minor, patch) \ + ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) \ + >= (major * 10000 + minor * 100 + patch)) +# endif #else # define YYJSON_GCC_VER 0 +# define yyjson_gcc_available(major, minor, patch) 0 +#endif + +/** real gcc check */ +#if !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__ICC) && \ + defined(__GNUC__) +# define YYJSON_IS_REAL_GCC 1 +#else +# define YYJSON_IS_REAL_GCC 0 #endif /** C version (STDC) */ @@ -178,6 +248,15 @@ namespace duckdb_yyjson { # endif #endif +/** compiler feature check (since clang 2.6, icc 17) */ +#ifndef yyjson_has_feature +# ifdef __has_feature +# define yyjson_has_feature(x) __has_feature(x) +# else +# define yyjson_has_feature(x) 0 +# endif +#endif + /** include check (since gcc 5.0, clang 2.7, icc 16, msvc 2017 15.3) */ #ifndef yyjson_has_include # ifdef __has_include @@ -248,6 +327,31 @@ namespace duckdb_yyjson { # endif #endif +/** compile-time constant check for compiler */ +#ifndef yyjson_constant_p +# if yyjson_has_builtin(__builtin_constant_p) || (YYJSON_GCC_VER >= 3) +# define YYJSON_HAS_CONSTANT_P 1 +# define yyjson_constant_p(value) __builtin_constant_p(value) +# else +# define YYJSON_HAS_CONSTANT_P 0 +# define yyjson_constant_p(value) 0 +# endif +#endif + +/** deprecate warning */ +#ifndef yyjson_deprecated +# if YYJSON_MSC_VER >= 1400 +# define yyjson_deprecated(msg) __declspec(deprecated(msg)) +# elif yyjson_has_feature(attribute_deprecated_with_message) || \ + (YYJSON_GCC_VER > 4 || (YYJSON_GCC_VER == 4 && __GNUC_MINOR__ >= 5)) +# define yyjson_deprecated(msg) __attribute__((deprecated(msg))) +# elif YYJSON_GCC_VER >= 3 +# define yyjson_deprecated(msg) __attribute__((deprecated)) +# else +# define yyjson_deprecated(msg) +# endif +#endif + /** function export */ #ifndef yyjson_api # if defined(_WIN32) @@ -376,6 +480,18 @@ namespace duckdb_yyjson { # endif #endif +/** + Microsoft Visual C++ 6.0 doesn't support converting number from u64 to f64: + error C2520: conversion from unsigned __int64 to double not implemented. + */ +#ifndef YYJSON_U64_TO_F64_NO_IMPL +# if (0 < YYJSON_MSC_VER) && (YYJSON_MSC_VER <= 1200) +# define YYJSON_U64_TO_F64_NO_IMPL 1 +# else +# define YYJSON_U64_TO_F64_NO_IMPL 0 +# endif +#endif + /*============================================================================== @@ -384,7 +500,7 @@ namespace duckdb_yyjson { /* extern "C" begin */ #ifdef __cplusplus -// extern "C" { +extern "C" { #endif /* warning suppress begin */ @@ -413,16 +529,16 @@ namespace duckdb_yyjson { #define YYJSON_VERSION_MAJOR 0 /** The minor version of yyjson. */ -#define YYJSON_VERSION_MINOR 6 +#define YYJSON_VERSION_MINOR 9 /** The patch version of yyjson. */ #define YYJSON_VERSION_PATCH 0 /** The version of yyjson in hex: `(major << 16) | (minor << 8) | (patch)`. */ -#define YYJSON_VERSION_HEX 0x000600 +#define YYJSON_VERSION_HEX 0x000900 /** The version string of yyjson. */ -#define YYJSON_VERSION_STRING "0.6.0" +#define YYJSON_VERSION_STRING "0.9.0" /** The version of yyjson in hex, same as `YYJSON_VERSION_HEX`. */ yyjson_api uint32_t yyjson_version(void); @@ -433,34 +549,57 @@ yyjson_api uint32_t yyjson_version(void); * JSON Types *============================================================================*/ -/** Type of JSON value (3 bit). */ +/** Type of a JSON value (3 bit). */ typedef uint8_t yyjson_type; +/** No type, invalid. */ #define YYJSON_TYPE_NONE ((uint8_t)0) /* _____000 */ +/** Raw string type, no subtype. */ #define YYJSON_TYPE_RAW ((uint8_t)1) /* _____001 */ +/** Null type: `null` literal, no subtype. */ #define YYJSON_TYPE_NULL ((uint8_t)2) /* _____010 */ +/** Boolean type, subtype: TRUE, FALSE. */ #define YYJSON_TYPE_BOOL ((uint8_t)3) /* _____011 */ +/** Number type, subtype: UINT, SINT, REAL. */ #define YYJSON_TYPE_NUM ((uint8_t)4) /* _____100 */ +/** String type, subtype: NONE, NOESC. */ #define YYJSON_TYPE_STR ((uint8_t)5) /* _____101 */ +/** Array type, no subtype. */ #define YYJSON_TYPE_ARR ((uint8_t)6) /* _____110 */ +/** Object type, no subtype. */ #define YYJSON_TYPE_OBJ ((uint8_t)7) /* _____111 */ -/** Subtype of JSON value (2 bit). */ +/** Subtype of a JSON value (2 bit). */ typedef uint8_t yyjson_subtype; +/** No subtype. */ #define YYJSON_SUBTYPE_NONE ((uint8_t)(0 << 3)) /* ___00___ */ +/** False subtype: `false` literal. */ #define YYJSON_SUBTYPE_FALSE ((uint8_t)(0 << 3)) /* ___00___ */ +/** True subtype: `true` literal. */ #define YYJSON_SUBTYPE_TRUE ((uint8_t)(1 << 3)) /* ___01___ */ +/** Unsigned integer subtype: `uint64_t`. */ #define YYJSON_SUBTYPE_UINT ((uint8_t)(0 << 3)) /* ___00___ */ +/** Signed integer subtype: `int64_t`. */ #define YYJSON_SUBTYPE_SINT ((uint8_t)(1 << 3)) /* ___01___ */ +/** Real number subtype: `double`. */ #define YYJSON_SUBTYPE_REAL ((uint8_t)(2 << 3)) /* ___10___ */ +/** String that do not need to be escaped for writing (internal use). */ +#define YYJSON_SUBTYPE_NOESC ((uint8_t)(1 << 3)) /* ___01___ */ -/** Mask and bits of JSON value tag. */ +/** The mask used to extract the type of a JSON value. */ #define YYJSON_TYPE_MASK ((uint8_t)0x07) /* _____111 */ +/** The number of bits used by the type. */ #define YYJSON_TYPE_BIT ((uint8_t)3) +/** The mask used to extract the subtype of a JSON value. */ #define YYJSON_SUBTYPE_MASK ((uint8_t)0x18) /* ___11___ */ +/** The number of bits used by the subtype. */ #define YYJSON_SUBTYPE_BIT ((uint8_t)2) +/** The mask used to extract the reserved bits of a JSON value. */ #define YYJSON_RESERVED_MASK ((uint8_t)0xE0) /* 111_____ */ +/** The number of reserved bits. */ #define YYJSON_RESERVED_BIT ((uint8_t)3) +/** The mask used to extract the tag of a JSON value. */ #define YYJSON_TAG_MASK ((uint8_t)0xFF) /* 11111111 */ +/** The number of bits used by the tag. */ #define YYJSON_TAG_BIT ((uint8_t)8) /** Padding size for JSON reader. */ @@ -492,13 +631,17 @@ typedef struct yyjson_alc { /** A pool allocator uses fixed length pre-allocated memory. - This allocator may used to avoid malloc/realloc calls. The pre-allocated memory - should be held by the caller. The maximum amount of memory required to read a - JSON can be calculated using the `yyjson_read_max_memory_usage()` function, but - the amount of memory required to write a JSON cannot be directly calculated. + This allocator may be used to avoid malloc/realloc calls. The pre-allocated + memory should be held by the caller. The maximum amount of memory required to + read a JSON can be calculated using the `yyjson_read_max_memory_usage()` + function, but the amount of memory required to write a JSON cannot be directly + calculated. - This is not a general-purpose allocator, and should only be used to read or - write single JSON document. + This is not a general-purpose allocator. It is designed to handle a single JSON + data at a time. If it is used for overly complex memory tasks, such as parsing + multiple JSON documents using the same allocator but releasing only a few of + them, it may cause memory fragmentation, resulting in performance degradation + and memory waste. @param alc The allocator to be initialized. If this parameter is NULL, the function will fail and return false. @@ -521,9 +664,31 @@ typedef struct yyjson_alc { yyjson_doc *doc = yyjson_read_opts(json, strlen(json), 0, &alc, NULL); // the memory of `doc` is on the stack @endcode + + @warning This Allocator is not thread-safe. */ yyjson_api bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, size_t size); +/** + A dynamic allocator. + + This allocator has a similar usage to the pool allocator above. However, when + there is not enough memory, this allocator will dynamically request more memory + using libc's `malloc` function, and frees it all at once when it is destroyed. + + @return A new dynamic allocator, or NULL if memory allocation failed. + @note The returned value should be freed with `yyjson_alc_dyn_free()`. + + @warning This Allocator is not thread-safe. + */ +yyjson_api yyjson_alc *yyjson_alc_dyn_new(void); + +/** + Free a dynamic allocator which is created by `yyjson_alc_dyn_new()`. + @param alc The dynamic allocator to be destroyed. + */ +yyjson_api void yyjson_alc_dyn_free(yyjson_alc *alc); + /*============================================================================== @@ -563,10 +728,6 @@ typedef struct yyjson_mut_val yyjson_mut_val; /*============================================================================== * JSON Reader API *============================================================================*/ -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-const-variable" -#endif /** Run-time options for JSON reader. */ typedef uint32_t yyjson_read_flag; @@ -576,10 +737,10 @@ typedef uint32_t yyjson_read_flag; - Read negative integer as int64_t. - Read floating-point number as double with round-to-nearest mode. - Read integer which cannot fit in uint64_t or int64_t as double. - - Report error if real number is infinity. + - Report error if double number is infinity. - Report error if string contains invalid UTF-8 character or BOM. - Report error on trailing commas, comments, inf and nan literals. */ -static const yyjson_read_flag YYJSON_READ_NOFLAG = 0 << 0; +static const yyjson_read_flag YYJSON_READ_NOFLAG = 0; /** Read the input data in-situ. This option allows the reader to modify and use input data to store string @@ -605,7 +766,7 @@ static const yyjson_read_flag YYJSON_READ_ALLOW_COMMENTS = 1 << 3; such as 1e999, NaN, inf, -Infinity (non-standard). */ static const yyjson_read_flag YYJSON_READ_ALLOW_INF_AND_NAN = 1 << 4; -/** Read number as raw string (value with `YYJSON_TYPE_RAW` type), +/** Read all numbers as raw strings (value with `YYJSON_TYPE_RAW` type), inf/nan literal is also read as raw with `ALLOW_INF_AND_NAN` flag. */ static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; @@ -619,6 +780,12 @@ static const yyjson_read_flag YYJSON_READ_NUMBER_AS_RAW = 1 << 5; risks. */ static const yyjson_read_flag YYJSON_READ_ALLOW_INVALID_UNICODE = 1 << 6; +/** Read big numbers as raw strings. These big numbers include integers that + cannot be represented by `int64_t` and `uint64_t`, and floating-point + numbers that cannot be represented by finite `double`. + The flag will be overridden by `YYJSON_READ_NUMBER_AS_RAW` flag. */ +static const yyjson_read_flag YYJSON_READ_BIGNUM_AS_RAW = 1 << 7; + /** Result code for JSON reader. */ @@ -636,7 +803,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_MEMORY_ALLOCATION = 2; /** Input JSON string is empty. */ static const yyjson_read_code YYJSON_READ_ERROR_EMPTY_CONTENT = 3; -/** Unexpected content after document, such as `[1]abc`. */ +/** Unexpected content after document, such as `[123]abc`. */ static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_CONTENT = 4; /** Unexpected ending, such as `[123`. */ @@ -676,9 +843,7 @@ typedef struct yyjson_read_err { size_t pos; } yyjson_read_err; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif + /** Read JSON with options. @@ -734,6 +899,28 @@ yyjson_api yyjson_doc *yyjson_read_file(const char *path, const yyjson_alc *alc, yyjson_read_err *err); +/** + Read JSON from a file pointer. + + @param fp The file pointer. + The data will be read from the current position of the FILE to the end. + If this fp is NULL or invalid, the function will fail and return NULL. + @param flg The JSON read options. + Multiple options can be combined with `|` operator. 0 means no options. + @param alc The memory allocator used by JSON reader. + Pass NULL to use the libc's default allocator. + @param err A pointer to receive error information. + Pass NULL if you don't need error information. + @return A new JSON document, or NULL if an error occurs. + When it's no longer needed, it should be freed with `yyjson_doc_free()`. + + @warning On 32-bit operating system, files larger than 2GB may fail to read. + */ +yyjson_api yyjson_doc *yyjson_read_fp(FILE *fp, + yyjson_read_flag flg, + const yyjson_alc *alc, + yyjson_read_err *err); + /** Read a JSON string. @@ -752,7 +939,8 @@ yyjson_api_inline yyjson_doc *yyjson_read(const char *dat, size_t len, yyjson_read_flag flg) { flg &= ~YYJSON_READ_INSITU; /* const string cannot be modified */ - return yyjson_read_opts((char *)dat, len, flg, NULL, NULL); + return yyjson_read_opts((char *)(void *)(size_t)(const void *)dat, + len, flg, NULL, NULL); } /** @@ -824,7 +1012,7 @@ yyjson_api_inline size_t yyjson_read_max_memory_usage(size_t len, The value will hold either UINT or SINT or REAL number; @param flg The JSON read options. Multiple options can be combined with `|` operator. 0 means no options. - Suppors `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`. + Supports `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`. @param alc The memory allocator used for long number. It is only used when the built-in floating point reader is disabled. Pass NULL to use the libc's default allocator. @@ -851,7 +1039,7 @@ yyjson_api const char *yyjson_read_number(const char *dat, The value will hold either UINT or SINT or REAL number; @param flg The JSON read options. Multiple options can be combined with `|` operator. 0 means no options. - Suppors `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`. + Supports `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`. @param alc The memory allocator used for long number. It is only used when the built-in floating point reader is disabled. Pass NULL to use the libc's default allocator. @@ -876,17 +1064,12 @@ yyjson_api_inline const char *yyjson_mut_read_number(const char *dat, /** Run-time options for JSON writer. */ typedef uint32_t yyjson_write_flag; -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-const-variable" -#endif - /** Default option: - Write JSON minify. - Report error on inf or nan number. - Report error on invalid UTF-8 string. - Do not escape unicode or slash. */ -static const yyjson_write_flag YYJSON_WRITE_NOFLAG = 0 << 0; +static const yyjson_write_flag YYJSON_WRITE_NOFLAG = 0; /** Write JSON pretty with 4 space indent. */ static const yyjson_write_flag YYJSON_WRITE_PRETTY = 1 << 0; @@ -915,6 +1098,10 @@ static const yyjson_write_flag YYJSON_WRITE_ALLOW_INVALID_UNICODE = 1 << 5; This flag will override `YYJSON_WRITE_PRETTY` flag. */ static const yyjson_write_flag YYJSON_WRITE_PRETTY_TWO_SPACES = 1 << 6; +/** Adds a newline character `\n` at the end of the JSON. + This can be helpful for text editors or NDJSON. */ +static const yyjson_write_flag YYJSON_WRITE_NEWLINE_AT_END = 1 << 7; + /** Result code for JSON writer */ @@ -952,9 +1139,7 @@ typedef struct yyjson_write_err { const char *msg; } yyjson_write_err; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif + /*============================================================================== * JSON Document Writer API @@ -972,8 +1157,8 @@ typedef struct yyjson_write_err { Multiple options can be combined with `|` operator. 0 means no options. @param alc The memory allocator used by JSON writer. Pass NULL to use the libc's default allocator. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @param err A pointer to receive error information. Pass NULL if you don't need error information. @return A new JSON string, or NULL if an error occurs. @@ -1014,6 +1199,30 @@ yyjson_api bool yyjson_write_file(const char *path, const yyjson_alc *alc, yyjson_write_err *err); +/** + Write a document to file pointer with options. + + @param fp The file pointer. + The data will be written to the current position of the file. + If this fp is NULL or invalid, the function will fail and return false. + @param doc The JSON document. + If this doc is NULL or has no root, the function will fail and return false. + @param flg The JSON write options. + Multiple options can be combined with `|` operator. 0 means no options. + @param alc The memory allocator used by JSON writer. + Pass NULL to use the libc's default allocator. + @param err A pointer to receive error information. + Pass NULL if you don't need error information. + @return true if successful, false if an error occurs. + + @warning On 32-bit operating system, files larger than 2GB may fail to write. + */ +yyjson_api bool yyjson_write_fp(FILE *fp, + const yyjson_doc *doc, + yyjson_write_flag flg, + const yyjson_alc *alc, + yyjson_write_err *err); + /** Write a document to JSON string. @@ -1023,8 +1232,8 @@ yyjson_api bool yyjson_write_file(const char *path, If this doc is NULL or has no root, the function will fail and return false. @param flg The JSON write options. Multiple options can be combined with `|` operator. 0 means no options. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @return A new JSON string, or NULL if an error occurs. This string is encoded as UTF-8 with a null-terminator. When it's no longer needed, it should be freed with free(). @@ -1050,8 +1259,8 @@ yyjson_api_inline char *yyjson_write(const yyjson_doc *doc, Multiple options can be combined with `|` operator. 0 means no options. @param alc The memory allocator used by JSON writer. Pass NULL to use the libc's default allocator. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @param err A pointer to receive error information. Pass NULL if you don't need error information. @return A new JSON string, or NULL if an error occurs. @@ -1093,6 +1302,30 @@ yyjson_api bool yyjson_mut_write_file(const char *path, const yyjson_alc *alc, yyjson_write_err *err); +/** + Write a document to file pointer with options. + + @param fp The file pointer. + The data will be written to the current position of the file. + If this fp is NULL or invalid, the function will fail and return false. + @param doc The mutable JSON document. + If this doc is NULL or has no root, the function will fail and return false. + @param flg The JSON write options. + Multiple options can be combined with `|` operator. 0 means no options. + @param alc The memory allocator used by JSON writer. + Pass NULL to use the libc's default allocator. + @param err A pointer to receive error information. + Pass NULL if you don't need error information. + @return true if successful, false if an error occurs. + + @warning On 32-bit operating system, files larger than 2GB may fail to write. + */ +yyjson_api bool yyjson_mut_write_fp(FILE *fp, + const yyjson_mut_doc *doc, + yyjson_write_flag flg, + const yyjson_alc *alc, + yyjson_write_err *err); + /** Write a document to JSON string. @@ -1103,8 +1336,8 @@ yyjson_api bool yyjson_mut_write_file(const char *path, If this doc is NULL or has no root, the function will fail and return false. @param flg The JSON write options. Multiple options can be combined with `|` operator. 0 means no options. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @return A new JSON string, or NULL if an error occurs. This string is encoded as UTF-8 with a null-terminator. When it's no longer needed, it should be freed with free(). @@ -1133,8 +1366,8 @@ yyjson_api_inline char *yyjson_mut_write(const yyjson_mut_doc *doc, Multiple options can be combined with `|` operator. 0 means no options. @param alc The memory allocator used by JSON writer. Pass NULL to use the libc's default allocator. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @param err A pointer to receive error information. Pass NULL if you don't need error information. @return A new JSON string, or NULL if an error occurs. @@ -1175,6 +1408,30 @@ yyjson_api bool yyjson_val_write_file(const char *path, const yyjson_alc *alc, yyjson_write_err *err); +/** + Write a value to file pointer with options. + + @param fp The file pointer. + The data will be written to the current position of the file. + If this path is NULL or invalid, the function will fail and return false. + @param val The JSON root value. + If this parameter is NULL, the function will fail and return NULL. + @param flg The JSON write options. + Multiple options can be combined with `|` operator. 0 means no options. + @param alc The memory allocator used by JSON writer. + Pass NULL to use the libc's default allocator. + @param err A pointer to receive error information. + Pass NULL if you don't need error information. + @return true if successful, false if an error occurs. + + @warning On 32-bit operating system, files larger than 2GB may fail to write. + */ +yyjson_api bool yyjson_val_write_fp(FILE *fp, + const yyjson_val *val, + yyjson_write_flag flg, + const yyjson_alc *alc, + yyjson_write_err *err); + /** Write a value to JSON string. @@ -1184,8 +1441,8 @@ yyjson_api bool yyjson_val_write_file(const char *path, If this parameter is NULL, the function will fail and return NULL. @param flg The JSON write options. Multiple options can be combined with `|` operator. 0 means no options. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @return A new JSON string, or NULL if an error occurs. This string is encoded as UTF-8 with a null-terminator. When it's no longer needed, it should be freed with free(). @@ -1209,8 +1466,8 @@ yyjson_api_inline char *yyjson_val_write(const yyjson_val *val, Multiple options can be combined with `|` operator. 0 means no options. @param alc The memory allocator used by JSON writer. Pass NULL to use the libc's default allocator. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @param err A pointer to receive error information. Pass NULL if you don't need error information. @return A new JSON string, or NULL if an error occurs. @@ -1252,6 +1509,30 @@ yyjson_api bool yyjson_mut_val_write_file(const char *path, const yyjson_alc *alc, yyjson_write_err *err); +/** + Write a value to JSON file with options. + + @param fp The file pointer. + The data will be written to the current position of the file. + If this path is NULL or invalid, the function will fail and return false. + @param val The mutable JSON root value. + If this parameter is NULL, the function will fail and return NULL. + @param flg The JSON write options. + Multiple options can be combined with `|` operator. 0 means no options. + @param alc The memory allocator used by JSON writer. + Pass NULL to use the libc's default allocator. + @param err A pointer to receive error information. + Pass NULL if you don't need error information. + @return true if successful, false if an error occurs. + + @warning On 32-bit operating system, files larger than 2GB may fail to write. + */ +yyjson_api bool yyjson_mut_val_write_fp(FILE *fp, + const yyjson_mut_val *val, + yyjson_write_flag flg, + const yyjson_alc *alc, + yyjson_write_err *err); + /** Write a value to JSON string. @@ -1262,8 +1543,8 @@ yyjson_api bool yyjson_mut_val_write_file(const char *path, If this parameter is NULL, the function will fail and return NULL. @param flg The JSON write options. Multiple options can be combined with `|` operator. 0 means no options. - @param len A pointer to receive output length in bytes. - Pass NULL if you don't need length information. + @param len A pointer to receive output length in bytes (not including the + null-terminator). Pass NULL if you don't need length information. @return A new JSON string, or NULL if an error occurs. This string is encoded as UTF-8 with a null-terminator. When it's no longer needed, it should be freed with free(). @@ -1408,6 +1689,10 @@ yyjson_api_inline int yyjson_get_int(yyjson_val *val); Returns 0.0 if `val` is NULL or type is not real(double). */ yyjson_api_inline double yyjson_get_real(yyjson_val *val); +/** Returns the content and typecast to `double` if the value is number. + Returns 0.0 if `val` is NULL or type is not number(uint/sint/real). */ +yyjson_api_inline double yyjson_get_num(yyjson_val *val); + /** Returns the content if the value is string. Returns NULL if `val` is NULL or type is not string. */ yyjson_api_inline const char *yyjson_get_str(yyjson_val *val); @@ -1427,7 +1712,10 @@ yyjson_api_inline bool yyjson_equals_strn(yyjson_val *val, const char *str, size_t len); /** Returns whether two JSON values are equal (deep compare). - Returns false if input is NULL. */ + Returns false if input is NULL. + @note the result may be inaccurate if object has duplicate keys. + @warning This function is recursive and may cause a stack overflow + if the object level is too deep. */ yyjson_api_inline bool yyjson_equals(yyjson_val *lhs, yyjson_val *rhs); /** Set the value to raw. @@ -1515,14 +1803,17 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr); @par Example @code yyjson_val *val; - yyjson_arr_iter iter; - yyjson_arr_iter_init(arr, &iter); + yyjson_arr_iter iter = yyjson_arr_iter_with(arr); while ((val = yyjson_arr_iter_next(&iter))) { your_func(val); } @endcode */ -typedef struct yyjson_arr_iter yyjson_arr_iter; +typedef struct yyjson_arr_iter { + size_t idx; /**< next value's index */ + size_t max; /**< maximum index (arr.size) */ + yyjson_val *cur; /**< next value */ +} yyjson_arr_iter; /** Initialize an iterator for this array. @@ -1538,6 +1829,17 @@ typedef struct yyjson_arr_iter yyjson_arr_iter; yyjson_api_inline bool yyjson_arr_iter_init(yyjson_val *arr, yyjson_arr_iter *iter); +/** + Create an iterator with an array , same as `yyjson_arr_iter_init()`. + + @param arr The array to be iterated over. + If this parameter is NULL or not an array, an empty iterator will returned. + @return A new iterator for the array. + + @note The iterator does not need to be destroyed. + */ +yyjson_api_inline yyjson_arr_iter yyjson_arr_iter_with(yyjson_val *arr); + /** Returns whether the iteration has more elements. If `iter` is NULL, this function will return false. @@ -1613,8 +1915,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *key, @par Example @code yyjson_val *key, *val; - yyjson_obj_iter iter; - yyjson_obj_iter_init(obj, &iter); + yyjson_obj_iter iter = yyjson_obj_iter_with(obj); while ((key = yyjson_obj_iter_next(&iter))) { val = yyjson_obj_iter_get_val(key); your_func(key, val); @@ -1626,14 +1927,18 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *key, @code // {"k1":1, "k2": 3, "k3": 3} yyjson_val *key, *val; - yyjson_obj_iter iter; - yyjson_obj_iter_init(obj, &iter); + yyjson_obj_iter iter = yyjson_obj_iter_with(obj); yyjson_val *v1 = yyjson_obj_iter_get(&iter, "k1"); yyjson_val *v3 = yyjson_obj_iter_get(&iter, "k3"); @endcode @see yyjson_obj_iter_get() and yyjson_obj_iter_getn() */ -typedef struct yyjson_obj_iter yyjson_obj_iter; +typedef struct yyjson_obj_iter { + size_t idx; /**< next key's index */ + size_t max; /**< maximum key index (obj.size) */ + yyjson_val *cur; /**< next key */ + yyjson_val *obj; /**< the object being iterated */ +} yyjson_obj_iter; /** Initialize an iterator for this object. @@ -1649,6 +1954,17 @@ typedef struct yyjson_obj_iter yyjson_obj_iter; yyjson_api_inline bool yyjson_obj_iter_init(yyjson_val *obj, yyjson_obj_iter *iter); +/** + Create an iterator with an object, same as `yyjson_obj_iter_init()`. + + @param obj The object to be iterated over. + If this parameter is NULL or not an object, an empty iterator will returned. + @return A new iterator for the object. + + @note The iterator does not need to be destroyed. + */ +yyjson_api_inline yyjson_obj_iter yyjson_obj_iter_with(yyjson_val *obj); + /** Returns whether the iteration has more elements. If `iter` is NULL, this function will return false. @@ -1745,6 +2061,38 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_root(yyjson_mut_doc *doc); yyjson_api_inline void yyjson_mut_doc_set_root(yyjson_mut_doc *doc, yyjson_mut_val *root); +/** + Set the string pool size for a mutable document. + This function does not allocate memory immediately, but uses the size when + the next memory allocation is needed. + + If the caller knows the approximate bytes of strings that the document needs to + store (e.g. copy string with `yyjson_mut_strcpy` function), setting a larger + size can avoid multiple memory allocations and improve performance. + + @param doc The mutable document. + @param len The desired string pool size in bytes (total string length). + @return true if successful, false if size is 0 or overflow. + */ +yyjson_api bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, + size_t len); + +/** + Set the value pool size for a mutable document. + This function does not allocate memory immediately, but uses the size when + the next memory allocation is needed. + + If the caller knows the approximate number of values that the document needs to + store (e.g. create new value with `yyjson_mut_xxx` functions), setting a larger + size can avoid multiple memory allocations and improve performance. + + @param doc The mutable document. + @param count The desired value pool size (number of `yyjson_mut_val`). + @return true if successful, false if size is 0 or overflow. + */ +yyjson_api bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, + size_t count); + /** Release the JSON document and free the memory. After calling this function, the `doc` and all values from the `doc` are no longer available. This function will do nothing if the `doc` is NULL. */ @@ -1911,6 +2259,10 @@ yyjson_api_inline int yyjson_mut_get_int(yyjson_mut_val *val); Returns 0.0 if `val` is NULL or type is not real(double). */ yyjson_api_inline double yyjson_mut_get_real(yyjson_mut_val *val); +/** Returns the content and typecast to `double` if the value is number. + Returns 0.0 if `val` is NULL or type is not number(uint/sint/real). */ +yyjson_api_inline double yyjson_mut_get_num(yyjson_mut_val *val); + /** Returns the content if the value is string. Returns NULL if `val` is NULL or type is not string. */ yyjson_api_inline const char *yyjson_mut_get_str(yyjson_mut_val *val); @@ -1933,7 +2285,7 @@ yyjson_api_inline bool yyjson_mut_equals_strn(yyjson_mut_val *val, /** Returns whether two JSON values are equal (deep compare). Returns false if input is NULL. - + @note the result may be inaccurate if object has duplicate keys. @warning This function is recursive and may cause a stack overflow if the object level is too deep. */ yyjson_api_inline bool yyjson_mut_equals(yyjson_mut_val *lhs, @@ -2128,8 +2480,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr); @par Example @code yyjson_mut_val *val; - yyjson_mut_arr_iter iter; - yyjson_mut_arr_iter_init(arr, &iter); + yyjson_mut_arr_iter iter = yyjson_mut_arr_iter_with(arr); while ((val = yyjson_mut_arr_iter_next(&iter))) { your_func(val); if (your_val_is_unused(val)) { @@ -2138,7 +2489,13 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr); } @endcode */ -typedef struct yyjson_mut_arr_iter yyjson_mut_arr_iter; +typedef struct yyjson_mut_arr_iter { + size_t idx; /**< next value's index */ + size_t max; /**< maximum index (arr.size) */ + yyjson_mut_val *cur; /**< current value */ + yyjson_mut_val *pre; /**< previous value */ + yyjson_mut_val *arr; /**< the array being iterated */ +} yyjson_mut_arr_iter; /** Initialize an iterator for this array. @@ -2154,6 +2511,18 @@ typedef struct yyjson_mut_arr_iter yyjson_mut_arr_iter; yyjson_api_inline bool yyjson_mut_arr_iter_init(yyjson_mut_val *arr, yyjson_mut_arr_iter *iter); +/** + Create an iterator with an array , same as `yyjson_mut_arr_iter_init()`. + + @param arr The array to be iterated over. + If this parameter is NULL or not an array, an empty iterator will returned. + @return A new iterator for the array. + + @note The iterator does not need to be destroyed. + */ +yyjson_api_inline yyjson_mut_arr_iter yyjson_mut_arr_iter_with( + yyjson_mut_val *arr); + /** Returns whether the iteration has more elements. If `iter` is NULL, this function will return false. @@ -2901,8 +3270,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, @par Example @code yyjson_mut_val *key, *val; - yyjson_mut_obj_iter iter; - yyjson_mut_obj_iter_init(obj, &iter); + yyjson_mut_obj_iter iter = yyjson_mut_obj_iter_with(obj); while ((key = yyjson_mut_obj_iter_next(&iter))) { val = yyjson_mut_obj_iter_get_val(key); your_func(key, val); @@ -2917,14 +3285,19 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, @code // {"k1":1, "k2": 3, "k3": 3} yyjson_mut_val *key, *val; - yyjson_mut_obj_iter iter; - yyjson_mut_obj_iter_init(obj, &iter); + yyjson_mut_obj_iter iter = yyjson_mut_obj_iter_with(obj); yyjson_mut_val *v1 = yyjson_mut_obj_iter_get(&iter, "k1"); yyjson_mut_val *v3 = yyjson_mut_obj_iter_get(&iter, "k3"); @endcode @see `yyjson_mut_obj_iter_get()` and `yyjson_mut_obj_iter_getn()` */ -typedef struct yyjson_mut_obj_iter yyjson_mut_obj_iter; +typedef struct yyjson_mut_obj_iter { + size_t idx; /**< next key's index */ + size_t max; /**< maximum key index (obj.size) */ + yyjson_mut_val *cur; /**< current key */ + yyjson_mut_val *pre; /**< previous key */ + yyjson_mut_val *obj; /**< the object being iterated */ +} yyjson_mut_obj_iter; /** Initialize an iterator for this object. @@ -2940,6 +3313,18 @@ typedef struct yyjson_mut_obj_iter yyjson_mut_obj_iter; yyjson_api_inline bool yyjson_mut_obj_iter_init(yyjson_mut_val *obj, yyjson_mut_obj_iter *iter); +/** + Create an iterator with an object, same as `yyjson_obj_iter_init()`. + + @param obj The object to be iterated over. + If this parameter is NULL or not an object, an empty iterator will returned. + @return A new iterator for the object. + + @note The iterator does not need to be destroyed. + */ +yyjson_api_inline yyjson_mut_obj_iter yyjson_mut_obj_iter_with( + yyjson_mut_val *obj); + /** Returns whether the iteration has more elements. If `iter` is NULL, this function will return false. @@ -2962,7 +3347,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_get_val( yyjson_mut_val *key); /** - Removes and returns current key-value pair in the iteration. + Removes current key-value pair in the iteration, returns the removed value. If `iter` is NULL, this function will return NULL. */ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_remove( @@ -3199,7 +3584,7 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_null(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3209,7 +3594,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_null(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_true(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3219,7 +3604,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_true(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_false(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3229,7 +3614,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_false(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3239,7 +3624,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_uint(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3249,7 +3634,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_uint(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_sint(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3259,7 +3644,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_sint(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3269,7 +3654,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc, The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3279,7 +3664,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc, The `key` and `val` should be null-terminated UTF-8 strings. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key/value strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3291,7 +3676,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc, The `len` should be the length of the `val`, in bytes. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key/value strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strn(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3303,7 +3688,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_strn(yyjson_mut_doc *doc, The value string is copied. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strcpy(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3316,18 +3701,44 @@ yyjson_api_inline bool yyjson_mut_obj_add_strcpy(yyjson_mut_doc *doc, The `len` should be the length of the `val`, in bytes. This function allows duplicated key in one object. - @warning The key/value string are not copied, you should keep these strings + @warning The key strings are not copied, you should keep these strings unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_strncpy(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *key, const char *val, size_t len); +/** + Creates and adds a new array to the target object. + The `key` should be a null-terminated UTF-8 string. + This function allows duplicated key in one object. + + @warning The key string is not copied, you should keep these strings + unmodified for the lifetime of this JSON document. + @return The new array, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_arr(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *key); + +/** + Creates and adds a new object to the target object. + The `key` should be a null-terminated UTF-8 string. + This function allows duplicated key in one object. + + @warning The key string is not copied, you should keep these strings + unmodified for the lifetime of this JSON document. + @return The new object, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_obj(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *key); + /** Adds a JSON value at the end of the object. The `key` should be a null-terminated UTF-8 string. This function allows duplicated key in one object. - @warning The key string are not copied, you should keep the string + @warning The key string is not copied, you should keep the string unmodified for the lifetime of this JSON document. */ yyjson_api_inline bool yyjson_mut_obj_add_val(yyjson_mut_doc *doc, yyjson_mut_val *obj, @@ -3384,98 +3795,683 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc, /*============================================================================== - * JSON Pointer API + * JSON Pointer API (RFC 6901) * https://tools.ietf.org/html/rfc6901 *============================================================================*/ -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a null-terminated UTF-8 string. - - Returns NULL if there's no matched value. - Returns NULL if `val/ptr` is NULL or `val` is not object. */ -yyjson_api_inline yyjson_val *yyjson_get_pointer(yyjson_val *val, - const char *ptr); +/** JSON Pointer error code. */ +typedef uint32_t yyjson_ptr_code; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a UTF-8 string, null-terminator is not required. - The `len` should be the length of the `ptr`, in bytes. - - Returns NULL if there's no matched value. - Returns NULL if `val/ptr` is NULL or `val` is not object. */ -yyjson_api_inline yyjson_val *yyjson_get_pointern(yyjson_val *val, - const char *ptr, - size_t len); +/** No JSON pointer error. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_NONE = 0; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a null-terminated UTF-8 string. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_val *yyjson_doc_get_pointer(yyjson_doc *doc, - const char *ptr); +/** Invalid input parameter, such as NULL input. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_PARAMETER = 1; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a UTF-8 string, null-terminator is not required. - The `len` should be the length of the `ptr`, in bytes. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_val *yyjson_doc_get_pointern(yyjson_doc *doc, - const char *ptr, - size_t len); +/** JSON pointer syntax error, such as invalid escape, token no prefix. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_SYNTAX = 2; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a null-terminated UTF-8 string. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointer(yyjson_mut_val *val, - const char *ptr); +/** JSON pointer resolve failed, such as index out of range, key not found. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_RESOLVE = 3; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a UTF-8 string, null-terminator is not required. - The `len` should be the length of the `ptr`, in bytes. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointern(yyjson_mut_val *val, - const char *ptr, - size_t len); +/** Document's root is NULL, but it is required for the function call. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_NULL_ROOT = 4; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a null-terminated UTF-8 string. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointer( - yyjson_mut_doc *doc, const char *ptr); +/** Cannot set root as the target is not a document. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_SET_ROOT = 5; -/** Get a JSON value with JSON Pointer (RFC 6901). - The `ptr` should be a UTF-8 string, null-terminator is not required. - The `len` should be the length of the `ptr`, in bytes. - - Returns NULL if there's no matched value. */ -yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointern( - yyjson_mut_doc *doc, const char *ptr, size_t len); +/** The memory allocation failed and a new value could not be created. */ +static const yyjson_ptr_code YYJSON_PTR_ERR_MEMORY_ALLOCATION = 6; +/** Error information for JSON pointer. */ +typedef struct yyjson_ptr_err { + /** Error code, see `yyjson_ptr_code` for all possible values. */ + yyjson_ptr_code code; + /** Error message, constant, no need to free (NULL if no error). */ + const char *msg; + /** Error byte position for input JSON pointer (0 if no error). */ + size_t pos; +} yyjson_ptr_err; +/** + A context for JSON pointer operation. + + This struct stores the context of JSON Pointer operation result. The struct + can be used with three helper functions: `ctx_append()`, `ctx_replace()`, and + `ctx_remove()`, which perform the corresponding operations on the container + without re-parsing the JSON Pointer. + + For example: + @code + // doc before: {"a":[0,1,null]} + // ptr: "/a/2" + val = yyjson_mut_doc_ptr_getx(doc, ptr, strlen(ptr), &ctx, &err); + if (yyjson_is_null(val)) { + yyjson_ptr_ctx_remove(&ctx); + } + // doc after: {"a":[0,1]} + @endcode + */ +typedef struct yyjson_ptr_ctx { + /** + The container (parent) of the target value. It can be either an array or + an object. If the target location has no value, but all its parent + containers exist, and the target location can be used to insert a new + value, then `ctn` is the parent container of the target location. + Otherwise, `ctn` is NULL. + */ + yyjson_mut_val *ctn; + /** + The previous sibling of the target value. It can be either a value in an + array or a key in an object. As the container is a `circular linked list` + of elements, `pre` is the previous node of the target value. If the + operation is `add` or `set`, then `pre` is the previous node of the new + value, not the original target value. If the target value does not exist, + `pre` is NULL. + */ + yyjson_mut_val *pre; + /** + The removed value if the operation is `set`, `replace` or `remove`. It can + be used to restore the original state of the document if needed. + */ + yyjson_mut_val *old; +} yyjson_ptr_ctx; -/*============================================================================== - * JSON Merge-Patch API +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_doc_ptr_get(yyjson_doc *doc, + const char *ptr); + +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_doc_ptr_getn(yyjson_doc *doc, + const char *ptr, size_t len); + +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param err A pointer to store the error information, or NULL if not needed. + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_doc_ptr_getx(yyjson_doc *doc, + const char *ptr, size_t len, + yyjson_ptr_err *err); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_ptr_get(yyjson_val *val, + const char *ptr); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_ptr_getn(yyjson_val *val, + const char *ptr, size_t len); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param err A pointer to store the error information, or NULL if not needed. + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_val *yyjson_ptr_getx(yyjson_val *val, + const char *ptr, size_t len, + yyjson_ptr_err *err); + +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_get(yyjson_mut_doc *doc, + const char *ptr); + +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_getn(yyjson_mut_doc *doc, + const char *ptr, + size_t len); + +/** + Get value by a JSON Pointer. + @param doc The JSON document to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The value referenced by the JSON pointer. + NULL if `doc` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_getx(yyjson_mut_doc *doc, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_get(yyjson_mut_val *val, + const char *ptr); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_getn(yyjson_mut_val *val, + const char *ptr, + size_t len); + +/** + Get value by a JSON Pointer. + @param val The JSON value to be queried. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The value referenced by the JSON pointer. + NULL if `val` or `ptr` is NULL, or the JSON pointer cannot be resolved. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_getx(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Add (insert) value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param new_val The value to be added. + @return true if JSON pointer is valid and new value is added, false otherwise. + @note The parent nodes will be created if they do not exist. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_add(yyjson_mut_doc *doc, + const char *ptr, + yyjson_mut_val *new_val); + +/** + Add (insert) value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be added. + @return true if JSON pointer is valid and new value is added, false otherwise. + @note The parent nodes will be created if they do not exist. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_addn(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val); + +/** + Add (insert) value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be added. + @param create_parent Whether to create parent nodes if not exist. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return true if JSON pointer is valid and new value is added, false otherwise. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_addx(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Add (insert) value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param doc Only used to create new values when needed. + @param new_val The value to be added. + @return true if JSON pointer is valid and new value is added, false otherwise. + @note The parent nodes will be created if they do not exist. + */ +yyjson_api_inline bool yyjson_mut_ptr_add(yyjson_mut_val *val, + const char *ptr, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc); + +/** + Add (insert) value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param doc Only used to create new values when needed. + @param new_val The value to be added. + @return true if JSON pointer is valid and new value is added, false otherwise. + @note The parent nodes will be created if they do not exist. + */ +yyjson_api_inline bool yyjson_mut_ptr_addn(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc); + +/** + Add (insert) value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param doc Only used to create new values when needed. + @param new_val The value to be added. + @param create_parent Whether to create parent nodes if not exist. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return true if JSON pointer is valid and new value is added, false otherwise. + */ +yyjson_api_inline bool yyjson_mut_ptr_addx(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Set value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param new_val The value to be set, pass NULL to remove. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note The parent nodes will be created if they do not exist. + If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_set(yyjson_mut_doc *doc, + const char *ptr, + yyjson_mut_val *new_val); + +/** + Set value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be set, pass NULL to remove. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note The parent nodes will be created if they do not exist. + If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_setn(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val); + +/** + Set value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be set, pass NULL to remove. + @param create_parent Whether to create parent nodes if not exist. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_doc_ptr_setx(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Set value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param new_val The value to be set, pass NULL to remove. + @param doc Only used to create new values when needed. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note The parent nodes will be created if they do not exist. + If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_ptr_set(yyjson_mut_val *val, + const char *ptr, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc); + +/** + Set value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be set, pass NULL to remove. + @param doc Only used to create new values when needed. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note The parent nodes will be created if they do not exist. + If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_ptr_setn(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc); + +/** + Set value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The value to be set, pass NULL to remove. + @param doc Only used to create new values when needed. + @param create_parent Whether to create parent nodes if not exist. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return true if JSON pointer is valid and new value is set, false otherwise. + @note If the target value already exists, it will be replaced by the new value. + */ +yyjson_api_inline bool yyjson_mut_ptr_setx(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Replace value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param new_val The new value to replace the old one. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replace( + yyjson_mut_doc *doc, const char *ptr, yyjson_mut_val *new_val); + +/** + Replace value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The new value to replace the old one. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replacen( + yyjson_mut_doc *doc, const char *ptr, size_t len, yyjson_mut_val *new_val); + +/** + Replace value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The new value to replace the old one. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replacex( + yyjson_mut_doc *doc, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err); + +/** + Replace value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @param new_val The new value to replace the old one. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replace( + yyjson_mut_val *val, const char *ptr, yyjson_mut_val *new_val); + +/** + Replace value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The new value to replace the old one. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replacen( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val); + +/** + Replace value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param new_val The new value to replace the old one. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The old value that was replaced, or NULL if not found. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replacex( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err); + +/** + Remove value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_remove( + yyjson_mut_doc *doc, const char *ptr); + +/** + Remove value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_removen( + yyjson_mut_doc *doc, const char *ptr, size_t len); + +/** + Remove value by a JSON pointer. + @param doc The target JSON document. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_removex( + yyjson_mut_doc *doc, const char *ptr, size_t len, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err); + +/** + Remove value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8 with null-terminator). + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_remove(yyjson_mut_val *val, + const char *ptr); + +/** + Remove value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_removen(yyjson_mut_val *val, + const char *ptr, + size_t len); + +/** + Remove value by a JSON pointer. + @param val The target JSON value. + @param ptr The JSON pointer string (UTF-8, null-terminator is not required). + @param len The length of `ptr` in bytes. + @param ctx A pointer to store the result context, or NULL if not needed. + @param err A pointer to store the error information, or NULL if not needed. + @return The removed value, or NULL on error. + */ +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_removex(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/** + Append value by JSON pointer context. + @param ctx The context from the `yyjson_mut_ptr_xxx()` calls. + @param key New key if `ctx->ctn` is object, or NULL if `ctx->ctn` is array. + @param val New value to be added. + @return true on success or false on fail. + */ +yyjson_api_inline bool yyjson_ptr_ctx_append(yyjson_ptr_ctx *ctx, + yyjson_mut_val *key, + yyjson_mut_val *val); + +/** + Replace value by JSON pointer context. + @param ctx The context from the `yyjson_mut_ptr_xxx()` calls. + @param val New value to be replaced. + @return true on success or false on fail. + @note If success, the old value will be returned via `ctx->old`. + */ +yyjson_api_inline bool yyjson_ptr_ctx_replace(yyjson_ptr_ctx *ctx, + yyjson_mut_val *val); + +/** + Remove value by JSON pointer context. + @param ctx The context from the `yyjson_mut_ptr_xxx()` calls. + @return true on success or false on fail. + @note If success, the old value will be returned via `ctx->old`. + */ +yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx); + + + +/*============================================================================== + * JSON Patch API (RFC 6902) + * https://tools.ietf.org/html/rfc6902 + *============================================================================*/ + +/** Result code for JSON patch. */ +typedef uint32_t yyjson_patch_code; + +/** Success, no error. */ +static const yyjson_patch_code YYJSON_PATCH_SUCCESS = 0; + +/** Invalid parameter, such as NULL input or non-array patch. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_INVALID_PARAMETER = 1; + +/** Memory allocation failure occurs. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_MEMORY_ALLOCATION = 2; + +/** JSON patch operation is not object type. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_INVALID_OPERATION = 3; + +/** JSON patch operation is missing a required key. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_MISSING_KEY = 4; + +/** JSON patch operation member is invalid. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_INVALID_MEMBER = 5; + +/** JSON patch operation `test` not equal. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_EQUAL = 6; + +/** JSON patch operation failed on JSON pointer. */ +static const yyjson_patch_code YYJSON_PATCH_ERROR_POINTER = 7; + +/** Error information for JSON patch. */ +typedef struct yyjson_patch_err { + /** Error code, see `yyjson_patch_code` for all possible values. */ + yyjson_patch_code code; + /** Index of the error operation (0 if no error). */ + size_t idx; + /** Error message, constant, no need to free (NULL if no error). */ + const char *msg; + /** JSON pointer error if `code == YYJSON_PATCH_ERROR_POINTER`. */ + yyjson_ptr_err ptr; +} yyjson_patch_err; + +/** + Creates and returns a patched JSON value (RFC 6902). + The memory of the returned value is allocated by the `doc`. + The `err` is used to receive error information, pass NULL if not needed. + Returns NULL if the patch could not be applied. + */ +yyjson_api yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, + yyjson_val *orig, + yyjson_val *patch, + yyjson_patch_err *err); + +/** + Creates and returns a patched JSON value (RFC 6902). + The memory of the returned value is allocated by the `doc`. + The `err` is used to receive error information, pass NULL if not needed. + Returns NULL if the patch could not be applied. + */ +yyjson_api yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, + yyjson_mut_val *orig, + yyjson_mut_val *patch, + yyjson_patch_err *err); + + + +/*============================================================================== + * JSON Merge-Patch API (RFC 7386) * https://tools.ietf.org/html/rfc7386 *============================================================================*/ -/** Creates and returns a merge-patched JSON value (RFC 7386). - The memory of the returned value is allocated by the `doc`. - Returns NULL if the patch could not be applied. - - @warning This function is recursive and may cause a stack overflow if the - object level is too deep. */ +/** + Creates and returns a merge-patched JSON value (RFC 7386). + The memory of the returned value is allocated by the `doc`. + Returns NULL if the patch could not be applied. + + @warning This function is recursive and may cause a stack overflow if the + object level is too deep. + */ yyjson_api yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, yyjson_val *orig, yyjson_val *patch); -/** Creates and returns a merge-patched JSON value (RFC 7386). - The memory of the returned value is allocated by the `doc`. - Returns NULL if the patch could not be applied. - - @warning This function is recursive and may cause a stack overflow if the - object level is too deep. */ +/** + Creates and returns a merge-patched JSON value (RFC 7386). + The memory of the returned value is allocated by the `doc`. + Returns NULL if the patch could not be applied. + + @warning This function is recursive and may cause a stack overflow if the + object level is too deep. + */ yyjson_api yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, yyjson_mut_val *orig, yyjson_mut_val *patch); @@ -3523,6 +4519,58 @@ struct yyjson_doc { * Unsafe JSON Value API (Implementation) *============================================================================*/ +/* + Whether the string does not need to be escaped for serialization. + This function is used to optimize the writing speed of small constant strings. + This function works only if the compiler can evaluate it at compile time. + + Clang supports it since v8.0, + earlier versions do not support constant_p(strlen) and return false. + GCC supports it since at least v4.4, + earlier versions may compile it as run-time instructions. + ICC supports it since at least v16, + earlier versions are uncertain. + + @param str The C string. + @param len The returnd value from strlen(str). + */ +yyjson_api_inline bool unsafe_yyjson_is_str_noesc(const char *str, size_t len) { +#if YYJSON_HAS_CONSTANT_P && \ + (!YYJSON_IS_REAL_GCC || yyjson_gcc_available(4, 4, 0)) + if (yyjson_constant_p(len) && len <= 32) { + /* + Same as the following loop: + + for (size_t i = 0; i < len; i++) { + char c = str[i]; + if (c < ' ' || c > '~' || c == '"' || c == '\\') return false; + } + + GCC evaluates it at compile time only if the string length is within 17 + and -O3 (which turns on the -fpeel-loops flag) is used. + So the loop is unrolled for GCC. + */ +# define yyjson_repeat32_incr(x) \ + x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) \ + x(8) x(9) x(10) x(11) x(12) x(13) x(14) x(15) \ + x(16) x(17) x(18) x(19) x(20) x(21) x(22) x(23) \ + x(24) x(25) x(26) x(27) x(28) x(29) x(30) x(31) +# define yyjson_check_char_noesc(i) \ + if (i < len) { \ + char c = str[i]; \ + if (c < ' ' || c > '~' || c == '"' || c == '\\') return false; } + yyjson_repeat32_incr(yyjson_check_char_noesc) +# undef yyjson_repeat32_incr +# undef yyjson_check_char_noesc + return true; + } +#else + (void)str; + (void)len; +#endif + return false; +} + yyjson_api_inline yyjson_type unsafe_yyjson_get_type(void *val) { uint8_t tag = (uint8_t)((yyjson_val *)val)->tag; return (yyjson_type)(tag & YYJSON_TYPE_MASK); @@ -3633,6 +4681,28 @@ yyjson_api_inline double unsafe_yyjson_get_real(void *val) { return ((yyjson_val *)val)->uni.f64; } +yyjson_api_inline double unsafe_yyjson_get_num(void *val) { + uint8_t tag = unsafe_yyjson_get_tag(val); + if (tag == (YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL)) { + return ((yyjson_val *)val)->uni.f64; + } else if (tag == (YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT)) { + return (double)((yyjson_val *)val)->uni.i64; + } else if (tag == (YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT)) { +#if YYJSON_U64_TO_F64_NO_IMPL + uint64_t msb = ((uint64_t)1) << 63; + uint64_t num = ((yyjson_val *)val)->uni.u64; + if ((num & msb) == 0) { + return (double)(int64_t)num; + } else { + return ((double)(int64_t)((num >> 1) | (num & 1))) * (double)2.0; + } +#else + return (double)((yyjson_val *)val)->uni.u64; +#endif + } + return 0.0; +} + yyjson_api_inline const char *unsafe_yyjson_get_str(void *val) { return ((yyjson_val *)val)->uni.str; } @@ -3654,9 +4724,8 @@ yyjson_api_inline yyjson_val *unsafe_yyjson_get_next(yyjson_val *val) { yyjson_api_inline bool unsafe_yyjson_equals_strn(void *val, const char *str, size_t len) { - uint64_t tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; - return ((yyjson_val *)val)->tag == tag && - duckdb::FastMemcmp(((yyjson_val *)val)->uni.str, str, len) == 0; + return unsafe_yyjson_get_len(val) == len && + memcmp(((yyjson_val *)val)->uni.str, str, len) == 0; } yyjson_api_inline bool unsafe_yyjson_equals_str(void *val, const char *str) { @@ -3671,18 +4740,18 @@ yyjson_api_inline void unsafe_yyjson_set_type(void *val, yyjson_type type, ((yyjson_val *)val)->tag = new_tag; } -yyjson_api_inline void unsafe_yyjson_set_tag(void *val, uint8_t tag) { - uint64_t new_tag = ((yyjson_val *)val)->tag; - new_tag = (new_tag & (~(uint64_t)YYJSON_TAG_MASK)) | (uint64_t)tag; - ((yyjson_val *)val)->tag = new_tag; -} - yyjson_api_inline void unsafe_yyjson_set_len(void *val, size_t len) { uint64_t tag = ((yyjson_val *)val)->tag & YYJSON_TAG_MASK; tag |= (uint64_t)len << YYJSON_TAG_BIT; ((yyjson_val *)val)->tag = tag; } +yyjson_api_inline void unsafe_yyjson_inc_len(void *val) { + uint64_t tag = ((yyjson_val *)val)->tag; + tag += (uint64_t)(1 << YYJSON_TAG_BIT); + ((yyjson_val *)val)->tag = tag; +} + yyjson_api_inline void unsafe_yyjson_set_raw(void *val, const char *raw, size_t len) { unsafe_yyjson_set_type(val, YYJSON_TYPE_RAW, YYJSON_SUBTYPE_NONE); @@ -3720,13 +4789,16 @@ yyjson_api_inline void unsafe_yyjson_set_real(void *val, double num) { } yyjson_api_inline void unsafe_yyjson_set_str(void *val, const char *str) { - unsafe_yyjson_set_type(val, YYJSON_TYPE_STR, YYJSON_SUBTYPE_NONE); - unsafe_yyjson_set_len(val, strlen(str)); + size_t len = strlen(str); + bool noesc = unsafe_yyjson_is_str_noesc(str, len); + yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE; + unsafe_yyjson_set_type(val, YYJSON_TYPE_STR, sub); + unsafe_yyjson_set_len(val, len); ((yyjson_val *)val)->uni.str = str; } yyjson_api_inline void unsafe_yyjson_set_strn(void *val, const char *str, - size_t len) { + size_t len) { unsafe_yyjson_set_type(val, YYJSON_TYPE_STR, YYJSON_SUBTYPE_NONE); unsafe_yyjson_set_len(val, len); ((yyjson_val *)val)->uni.str = str; @@ -3763,6 +4835,7 @@ yyjson_api_inline size_t yyjson_doc_get_val_count(yyjson_doc *doc) { yyjson_api_inline void yyjson_doc_free(yyjson_doc *doc) { if (doc) { yyjson_alc alc = doc->alc; + memset(&doc->alc, 0, sizeof(alc)); if (doc->str_pool) alc.free(alc.ctx, doc->str_pool); alc.free(alc.ctx, doc); } @@ -3853,6 +4926,7 @@ yyjson_api_inline const char *yyjson_get_type_desc(yyjson_val *val) { case YYJSON_TYPE_RAW | YYJSON_SUBTYPE_NONE: return "raw"; case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE: return "null"; case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE: return "string"; + case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC: return "string"; case YYJSON_TYPE_ARR | YYJSON_SUBTYPE_NONE: return "array"; case YYJSON_TYPE_OBJ | YYJSON_SUBTYPE_NONE: return "object"; case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE: return "true"; @@ -3888,6 +4962,10 @@ yyjson_api_inline double yyjson_get_real(yyjson_val *val) { return yyjson_is_real(val) ? unsafe_yyjson_get_real(val) : 0.0; } +yyjson_api_inline double yyjson_get_num(yyjson_val *val) { + return val ? unsafe_yyjson_get_num(val) : 0.0; +} + yyjson_api_inline const char *yyjson_get_str(yyjson_val *val) { return yyjson_is_str(val) ? unsafe_yyjson_get_str(val) : NULL; } @@ -3898,7 +4976,8 @@ yyjson_api_inline size_t yyjson_get_len(yyjson_val *val) { yyjson_api_inline bool yyjson_equals_str(yyjson_val *val, const char *str) { if (yyjson_likely(val && str)) { - return unsafe_yyjson_equals_str(val, str); + return unsafe_yyjson_is_str(val) && + unsafe_yyjson_equals_str(val, str); } return false; } @@ -3906,7 +4985,8 @@ yyjson_api_inline bool yyjson_equals_str(yyjson_val *val, const char *str) { yyjson_api_inline bool yyjson_equals_strn(yyjson_val *val, const char *str, size_t len) { if (yyjson_likely(val && str)) { - return unsafe_yyjson_equals_strn(val, str, len); + return unsafe_yyjson_is_str(val) && + unsafe_yyjson_equals_strn(val, str, len); } return false; } @@ -4032,12 +5112,6 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr) { * JSON Array Iterator API (Implementation) *============================================================================*/ -struct yyjson_arr_iter { - size_t idx; /**< current index, from 0 */ - size_t max; /**< maximum index, idx < max */ - yyjson_val *cur; /**< current value */ -}; - yyjson_api_inline bool yyjson_arr_iter_init(yyjson_val *arr, yyjson_arr_iter *iter) { if (yyjson_likely(yyjson_is_arr(arr) && iter)) { @@ -4050,6 +5124,12 @@ yyjson_api_inline bool yyjson_arr_iter_init(yyjson_val *arr, return false; } +yyjson_api_inline yyjson_arr_iter yyjson_arr_iter_with(yyjson_val *arr) { + yyjson_arr_iter iter; + yyjson_arr_iter_init(arr, &iter); + return iter; +} + yyjson_api_inline bool yyjson_arr_iter_has_next(yyjson_arr_iter *iter) { return iter ? iter->idx < iter->max : false; } @@ -4083,15 +5163,11 @@ yyjson_api_inline yyjson_val *yyjson_obj_get(yyjson_val *obj, yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *_key, size_t key_len) { - uint64_t tag = (((uint64_t)key_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; if (yyjson_likely(yyjson_is_obj(obj) && _key)) { size_t len = unsafe_yyjson_get_len(obj); yyjson_val *key = unsafe_yyjson_get_first(obj); while (len-- > 0) { - if (key->tag == tag && - duckdb::FastMemcmp(key->uni.ptr, _key, key_len) == 0) { - return key + 1; - } + if (unsafe_yyjson_equals_strn(key, _key, key_len)) return key + 1; key = unsafe_yyjson_get_next(key + 1); } } @@ -4104,13 +5180,6 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, * JSON Object Iterator API (Implementation) *============================================================================*/ -struct yyjson_obj_iter { - size_t idx; /**< current key index, from 0 */ - size_t max; /**< maximum key index, idx < max */ - yyjson_val *cur; /**< current key */ - yyjson_val *obj; /**< the object being iterated */ -}; - yyjson_api_inline bool yyjson_obj_iter_init(yyjson_val *obj, yyjson_obj_iter *iter) { if (yyjson_likely(yyjson_is_obj(obj) && iter)) { @@ -4124,6 +5193,12 @@ yyjson_api_inline bool yyjson_obj_iter_init(yyjson_val *obj, return false; } +yyjson_api_inline yyjson_obj_iter yyjson_obj_iter_with(yyjson_val *obj) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(obj, &iter); + return iter; +} + yyjson_api_inline bool yyjson_obj_iter_has_next(yyjson_obj_iter *iter) { return iter ? iter->idx < iter->max : false; } @@ -4160,8 +5235,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter, } while (idx++ < max) { yyjson_val *next = unsafe_yyjson_get_next(cur + 1); - if (unsafe_yyjson_get_len(cur) == key_len && - duckdb::FastMemcmp(cur->uni.str, key, key_len) == 0) { + if (unsafe_yyjson_equals_strn(cur, key, key_len)) { iter->idx = idx; iter->cur = next; return cur + 1; @@ -4198,8 +5272,9 @@ struct yyjson_mut_val { A memory chunk in string memory pool. */ typedef struct yyjson_str_chunk { - struct yyjson_str_chunk *next; - /* flexible array member here */ + struct yyjson_str_chunk *next; /* next chunk linked list */ + size_t chunk_size; /* chunk size in bytes */ + /* char str[]; flexible array member */ } yyjson_str_chunk; /** @@ -4215,10 +5290,13 @@ typedef struct yyjson_str_pool { /** A memory chunk in value memory pool. + `sizeof(yyjson_val_chunk)` should not larger than `sizeof(yyjson_mut_val)`. */ typedef struct yyjson_val_chunk { - struct yyjson_val_chunk *next; - /* flexible array member here */ + struct yyjson_val_chunk *next; /* next chunk linked list */ + size_t chunk_size; /* chunk size in bytes */ + /* char pad[sizeof(yyjson_mut_val) - sizeof(yyjson_val_chunk)]; padding */ + /* yyjson_mut_val vals[]; flexible array member */ } yyjson_val_chunk; /** @@ -4249,21 +5327,26 @@ yyjson_api bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, const yyjson_alc *alc, size_t count); -yyjson_api_inline char *unsafe_yyjson_mut_strncpy(yyjson_mut_doc *doc, - const char *str, size_t len) { +/* Allocate memory for string. */ +yyjson_api_inline char *unsafe_yyjson_mut_str_alc(yyjson_mut_doc *doc, + size_t len) { char *mem; const yyjson_alc *alc = &doc->alc; yyjson_str_pool *pool = &doc->str_pool; - - if (!str) return NULL; if (yyjson_unlikely((size_t)(pool->end - pool->cur) <= len)) { if (yyjson_unlikely(!unsafe_yyjson_str_pool_grow(pool, alc, len + 1))) { return NULL; } } - mem = pool->cur; pool->cur = mem + len + 1; + return mem; +} + +yyjson_api_inline char *unsafe_yyjson_mut_strncpy(yyjson_mut_doc *doc, + const char *str, size_t len) { + char *mem = unsafe_yyjson_mut_str_alc(doc, len); + if (yyjson_unlikely(!mem)) return NULL; memcpy((void *)mem, (const void *)str, len); mem[len] = '\0'; return mem; @@ -4279,7 +5362,6 @@ yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_val(yyjson_mut_doc *doc, return NULL; } } - val = pool->cur; pool->cur += count; return val; @@ -4408,6 +5490,10 @@ yyjson_api_inline double yyjson_mut_get_real(yyjson_mut_val *val) { return yyjson_get_real((yyjson_val *)val); } +yyjson_api_inline double yyjson_mut_get_num(yyjson_mut_val *val) { + return yyjson_get_num((yyjson_val *)val); +} + yyjson_api_inline const char *yyjson_mut_get_str(yyjson_mut_val *val) { return yyjson_get_str((yyjson_val *)val); } @@ -4589,6 +5675,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_bool(yyjson_mut_doc *doc, if (yyjson_likely(doc)) { yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1); if (yyjson_likely(val)) { + _val = !!_val; val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)_val << 3); return val; } @@ -4642,7 +5729,18 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_real(yyjson_mut_doc *doc, yyjson_api_inline yyjson_mut_val *yyjson_mut_str(yyjson_mut_doc *doc, const char *str) { - if (yyjson_likely(str)) return yyjson_mut_strn(doc, str, strlen(str)); + if (yyjson_likely(doc && str)) { + size_t len = strlen(str); + bool noesc = unsafe_yyjson_is_str_noesc(str, len); + yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE; + yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1); + if (yyjson_likely(val)) { + val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | + (uint64_t)(YYJSON_TYPE_STR | sub); + val->uni.str = str; + return val; + } + } return NULL; } @@ -4662,7 +5760,19 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_strn(yyjson_mut_doc *doc, yyjson_api_inline yyjson_mut_val *yyjson_mut_strcpy(yyjson_mut_doc *doc, const char *str) { - if (yyjson_likely(str)) return yyjson_mut_strncpy(doc, str, strlen(str)); + if (yyjson_likely(doc && str)) { + size_t len = strlen(str); + bool noesc = unsafe_yyjson_is_str_noesc(str, len); + yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE; + yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1); + char *new_str = unsafe_yyjson_mut_strncpy(doc, str, len); + if (yyjson_likely(val && new_str)) { + val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | + (uint64_t)(YYJSON_TYPE_STR | sub); + val->uni.str = new_str; + return val; + } + } return NULL; } @@ -4723,14 +5833,6 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last( * Mutable JSON Array Iterator API (Implementation) *============================================================================*/ -struct yyjson_mut_arr_iter { - size_t idx; /**< current index, from 0 */ - size_t max; /**< maximum index, idx < max */ - yyjson_mut_val *cur; /**< current value */ - yyjson_mut_val *pre; /**< previous value */ - yyjson_mut_val *arr; /**< the array being iterated */ -}; - yyjson_api_inline bool yyjson_mut_arr_iter_init(yyjson_mut_val *arr, yyjson_mut_arr_iter *iter) { if (yyjson_likely(yyjson_mut_is_arr(arr) && iter)) { @@ -4745,6 +5847,13 @@ yyjson_api_inline bool yyjson_mut_arr_iter_init(yyjson_mut_val *arr, return false; } +yyjson_api_inline yyjson_mut_arr_iter yyjson_mut_arr_iter_with( + yyjson_mut_val *arr) { + yyjson_mut_arr_iter iter; + yyjson_mut_arr_iter_init(arr, &iter); + return iter; +} + yyjson_api_inline bool yyjson_mut_arr_iter_has_next(yyjson_mut_arr_iter *iter) { return iter ? iter->idx < iter->max : false; } @@ -4819,7 +5928,8 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr(yyjson_mut_doc *doc) { yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_bool( yyjson_mut_doc *doc, const bool *vals, size_t count) { yyjson_mut_arr_with_func({ - val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)vals[i] << 3); + bool _val = !!vals[i]; + val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)_val << 3); }); } @@ -5342,15 +6452,11 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_get(yyjson_mut_val *obj, yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, const char *_key, size_t key_len) { - uint64_t tag = (((uint64_t)key_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; size_t len = yyjson_mut_obj_size(obj); if (yyjson_likely(len && _key)) { yyjson_mut_val *key = ((yyjson_mut_val *)obj->uni.ptr)->next->next; while (len-- > 0) { - if (key->tag == tag && - duckdb::FastMemcmp(key->uni.ptr, _key, key_len) == 0) { - return key->next; - } + if (unsafe_yyjson_equals_strn(key, _key, key_len)) return key->next; key = key->next->next; } } @@ -5363,14 +6469,6 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj, * Mutable JSON Object Iterator API (Implementation) *============================================================================*/ -struct yyjson_mut_obj_iter { - size_t idx; /**< current key index, from 0 */ - size_t max; /**< maximum key index, idx < max */ - yyjson_mut_val *cur; /**< current key */ - yyjson_mut_val *pre; /**< previous key */ - yyjson_mut_val *obj; /**< the object being iterated */ -}; - yyjson_api_inline bool yyjson_mut_obj_iter_init(yyjson_mut_val *obj, yyjson_mut_obj_iter *iter) { if (yyjson_likely(yyjson_mut_is_obj(obj) && iter)) { @@ -5385,6 +6483,13 @@ yyjson_api_inline bool yyjson_mut_obj_iter_init(yyjson_mut_val *obj, return false; } +yyjson_api_inline yyjson_mut_obj_iter yyjson_mut_obj_iter_with( + yyjson_mut_val *obj) { + yyjson_mut_obj_iter iter; + yyjson_mut_obj_iter_init(obj, &iter); + return iter; +} + yyjson_api_inline bool yyjson_mut_obj_iter_has_next(yyjson_mut_obj_iter *iter) { return iter ? iter->idx < iter->max : false; } @@ -5417,8 +6522,8 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_remove( iter->max--; unsafe_yyjson_set_len(iter->obj, iter->max); prev->next->next = next; - iter->cur = next; - return cur; + iter->cur = prev; + return cur->next; } return NULL; } @@ -5437,8 +6542,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn( while (idx++ < max) { pre = cur; cur = cur->next->next; - if (unsafe_yyjson_get_len(cur) == key_len && - duckdb::FastMemcmp(cur->uni.str, key, key_len) == 0) { + if (unsafe_yyjson_equals_strn(cur, key, key_len)) { iter->idx += idx; if (iter->idx > max) iter->idx -= max + 1; iter->pre = pre; @@ -5554,7 +6658,7 @@ yyjson_api_inline void unsafe_yyjson_mut_obj_add(yyjson_mut_val *obj, } yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_obj_remove( - yyjson_mut_val *obj, const char *key, size_t key_len, uint64_t key_tag) { + yyjson_mut_val *obj, const char *key, size_t key_len) { size_t obj_len = unsafe_yyjson_get_len(obj); if (obj_len) { yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr; @@ -5562,8 +6666,7 @@ yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_obj_remove( yyjson_mut_val *removed_item = NULL; size_t i; for (i = 0; i < obj_len; i++) { - if (key_tag == cur_key->tag && - duckdb::FastMemcmp(key, cur_key->uni.ptr, key_len) == 0) { + if (unsafe_yyjson_equals_strn(cur_key, key, key_len)) { if (!removed_item) removed_item = cur_key->next; cur_key = cur_key->next->next; pre_key->next->next = cur_key; @@ -5592,8 +6695,7 @@ yyjson_api_inline bool unsafe_yyjson_mut_obj_replace(yyjson_mut_val *obj, yyjson_mut_val *cur_key = pre_key->next->next; size_t i; for (i = 0; i < obj_len; i++) { - if (key->tag == cur_key->tag && - duckdb::FastMemcmp(key->uni.str, cur_key->uni.ptr, key_len) == 0) { + if (unsafe_yyjson_equals_strn(cur_key, key->uni.str, key_len)) { cur_key->next->tag = val->tag; cur_key->next->uni.u64 = val->uni.u64; return true; @@ -5626,17 +6728,27 @@ yyjson_api_inline bool yyjson_mut_obj_add(yyjson_mut_val *obj, yyjson_api_inline bool yyjson_mut_obj_put(yyjson_mut_val *obj, yyjson_mut_val *key, yyjson_mut_val *val) { - if (yyjson_likely(yyjson_mut_is_obj(obj) && - yyjson_mut_is_str(key))) { - unsafe_yyjson_mut_obj_remove(obj, key->uni.str, - unsafe_yyjson_get_len(key), key->tag); - if (yyjson_likely(val)) { - unsafe_yyjson_mut_obj_add(obj, key, val, - unsafe_yyjson_get_len(obj)); + bool replaced = false; + size_t key_len; + yyjson_mut_obj_iter iter; + yyjson_mut_val *cur_key; + if (yyjson_unlikely(!yyjson_mut_is_obj(obj) || + !yyjson_mut_is_str(key))) return false; + key_len = unsafe_yyjson_get_len(key); + yyjson_mut_obj_iter_init(obj, &iter); + while ((cur_key = yyjson_mut_obj_iter_next(&iter)) != 0) { + if (unsafe_yyjson_equals_strn(cur_key, key->uni.str, key_len)) { + if (!replaced && val) { + replaced = true; + val->next = cur_key->next->next; + cur_key->next = val; + } else { + yyjson_mut_obj_iter_remove(&iter); + } } - return true; } - return false; + if (!replaced && val) unsafe_yyjson_mut_obj_add(obj, key, val, iter.max); + return true; } yyjson_api_inline bool yyjson_mut_obj_insert(yyjson_mut_val *obj, @@ -5665,8 +6777,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_remove(yyjson_mut_val *obj, yyjson_mut_val *key) { if (yyjson_likely(yyjson_mut_is_obj(obj) && yyjson_mut_is_str(key))) { return unsafe_yyjson_mut_obj_remove(obj, key->uni.str, - unsafe_yyjson_get_len(key), - key->tag); + unsafe_yyjson_get_len(key)); } return NULL; } @@ -5675,8 +6786,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_remove_key( yyjson_mut_val *obj, const char *key) { if (yyjson_likely(yyjson_mut_is_obj(obj) && key)) { size_t key_len = strlen(key); - uint64_t tag = ((uint64_t)key_len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; - return unsafe_yyjson_mut_obj_remove(obj, key, key_len, tag); + return unsafe_yyjson_mut_obj_remove(obj, key, key_len); } return NULL; } @@ -5684,8 +6794,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_remove_key( yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_remove_keyn( yyjson_mut_val *obj, const char *key, size_t key_len) { if (yyjson_likely(yyjson_mut_is_obj(obj) && key)) { - uint64_t tag = ((uint64_t)key_len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; - return unsafe_yyjson_mut_obj_remove(obj, key, key_len, tag); + return unsafe_yyjson_mut_obj_remove(obj, key, key_len); } return NULL; } @@ -5730,7 +6839,10 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, if (yyjson_likely(key)) { \ size_t len = unsafe_yyjson_get_len(obj); \ yyjson_mut_val *val = key + 1; \ - key->tag = YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE; \ + size_t key_len = strlen(_key); \ + bool noesc = unsafe_yyjson_is_str_noesc(_key, key_len); \ + key->tag = YYJSON_TYPE_STR; \ + key->tag |= noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE; \ key->tag |= (uint64_t)strlen(_key) << YYJSON_TAG_BIT; \ key->uni.str = _key; \ func \ @@ -5769,6 +6881,7 @@ yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc, const char *_key, bool _val) { yyjson_mut_obj_add_func({ + _val = !!_val; val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)(_val) << 3); }); } @@ -5819,7 +6932,10 @@ yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc, const char *_val) { if (yyjson_unlikely(!_val)) return false; yyjson_mut_obj_add_func({ + size_t val_len = strlen(_val); + bool val_noesc = unsafe_yyjson_is_str_noesc(_val, val_len); val->tag = ((uint64_t)strlen(_val) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; + val->tag |= val_noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE; val->uni.str = _val; }); } @@ -5862,6 +6978,22 @@ yyjson_api_inline bool yyjson_mut_obj_add_strncpy(yyjson_mut_doc *doc, }); } +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_arr(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *_key) { + yyjson_mut_val *key = yyjson_mut_str(doc, _key); + yyjson_mut_val *val = yyjson_mut_arr(doc); + return yyjson_mut_obj_add(obj, key, val) ? val : NULL; +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_add_obj(yyjson_mut_doc *doc, + yyjson_mut_val *obj, + const char *_key) { + yyjson_mut_val *key = yyjson_mut_str(doc, _key); + yyjson_mut_val *val = yyjson_mut_obj(doc); + return yyjson_mut_obj_add(obj, key, val) ? val : NULL; +} + yyjson_api_inline bool yyjson_mut_obj_add_val(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *_key, @@ -5885,8 +7017,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_remove_strn( yyjson_mut_val *val_removed = NULL; yyjson_mut_obj_iter_init(obj, &iter); while ((key = yyjson_mut_obj_iter_next(&iter)) != NULL) { - if (unsafe_yyjson_get_len(key) == _len && - duckdb::FastMemcmp(key->uni.str, _key, _len) == 0) { + if (unsafe_yyjson_equals_strn(key, _key, _len)) { if (!val_removed) val_removed = key->next; yyjson_mut_obj_iter_remove(&iter); } @@ -5934,65 +7065,842 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc, * JSON Pointer API (Implementation) *============================================================================*/ -/* `val` not null, `ptr` start with '/', `len` > 0. */ -yyjson_api yyjson_val *unsafe_yyjson_get_pointer(yyjson_val *val, - const char *ptr, - size_t len); +#define yyjson_ptr_set_err(_code, _msg) do { \ + if (err) { \ + err->code = YYJSON_PTR_ERR_##_code; \ + err->msg = _msg; \ + err->pos = 0; \ + } \ +} while(false) + +/* require: val != NULL, *ptr == '/', len > 0 */ +yyjson_api yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, + const char *ptr, size_t len, + yyjson_ptr_err *err); -/* `val` not null, `ptr` start with '/', `len` > 0. */ -yyjson_api yyjson_mut_val *unsafe_yyjson_mut_get_pointer(yyjson_mut_val *val, +/* require: val != NULL, *ptr == '/', len > 0 */ +yyjson_api yyjson_mut_val *unsafe_yyjson_mut_ptr_getx(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/* require: val/new_val/doc != NULL, *ptr == '/', len > 0 */ +yyjson_api bool unsafe_yyjson_mut_ptr_putx(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, bool insert_new, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); + +/* require: val/err != NULL, *ptr == '/', len > 0 */ +yyjson_api yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err); + +/* require: val/err != NULL, *ptr == '/', len > 0 */ +yyjson_api yyjson_mut_val *unsafe_yyjson_mut_ptr_removex(yyjson_mut_val *val, const char *ptr, - size_t len); + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err); -yyjson_api_inline yyjson_val *yyjson_get_pointern(yyjson_val *val, - const char *ptr, - size_t len) { - if (!val || !ptr) return NULL; - if (len == 0) return val; - if (*ptr != '/') return NULL; - return unsafe_yyjson_get_pointer(val, ptr, len); +yyjson_api_inline yyjson_val *yyjson_doc_ptr_get(yyjson_doc *doc, + const char *ptr) { + if (yyjson_unlikely(!ptr)) return NULL; + return yyjson_doc_ptr_getn(doc, ptr, strlen(ptr)); } -yyjson_api_inline yyjson_val *yyjson_get_pointer(yyjson_val *val, - const char *ptr) { - if (!val || !ptr) return NULL; - return yyjson_get_pointern(val, ptr, strlen(ptr)); +yyjson_api_inline yyjson_val *yyjson_doc_ptr_getn(yyjson_doc *doc, + const char *ptr, size_t len) { + return yyjson_doc_ptr_getx(doc, ptr, len, NULL); } -yyjson_api_inline yyjson_val *yyjson_doc_get_pointern(yyjson_doc *doc, +yyjson_api_inline yyjson_val *yyjson_doc_ptr_getx(yyjson_doc *doc, + const char *ptr, size_t len, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (yyjson_unlikely(!doc || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + return doc->root; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_ptr_getx(doc->root, ptr, len, err); +} + +yyjson_api_inline yyjson_val *yyjson_ptr_get(yyjson_val *val, + const char *ptr) { + if (yyjson_unlikely(!ptr)) return NULL; + return yyjson_ptr_getn(val, ptr, strlen(ptr)); +} + +yyjson_api_inline yyjson_val *yyjson_ptr_getn(yyjson_val *val, + const char *ptr, size_t len) { + return yyjson_ptr_getx(val, ptr, len, NULL); +} + +yyjson_api_inline yyjson_val *yyjson_ptr_getx(yyjson_val *val, + const char *ptr, size_t len, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (yyjson_unlikely(!val || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + return val; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_ptr_getx(val, ptr, len, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_get(yyjson_mut_doc *doc, + const char *ptr) { + if (!ptr) return NULL; + return yyjson_mut_doc_ptr_getn(doc, ptr, strlen(ptr)); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_getn(yyjson_mut_doc *doc, + const char *ptr, + size_t len) { + return yyjson_mut_doc_ptr_getx(doc, ptr, len, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_getx(yyjson_mut_doc *doc, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!doc || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + return doc->root; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_getx(doc->root, ptr, len, ctx, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_get(yyjson_mut_val *val, + const char *ptr) { + if (!ptr) return NULL; + return yyjson_mut_ptr_getn(val, ptr, strlen(ptr)); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_getn(yyjson_mut_val *val, const char *ptr, size_t len) { - return yyjson_get_pointern(doc ? doc->root : NULL, ptr, len); + return yyjson_mut_ptr_getx(val, ptr, len, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_getx(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!val || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + return val; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_add(yyjson_mut_doc *doc, + const char *ptr, + yyjson_mut_val *new_val) { + if (yyjson_unlikely(!ptr)) return false; + return yyjson_mut_doc_ptr_addn(doc, ptr, strlen(ptr), new_val); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_addn(yyjson_mut_doc *doc, + const char *ptr, + size_t len, + yyjson_mut_val *new_val) { + return yyjson_mut_doc_ptr_addx(doc, ptr, len, new_val, true, NULL, NULL); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_addx(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!doc || !ptr || !new_val)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return false; + } + if (yyjson_unlikely(len == 0)) { + if (doc->root) { + yyjson_ptr_set_err(SET_ROOT, "cannot set document's root"); + return false; + } else { + doc->root = new_val; + return true; + } + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return false; + } + if (yyjson_unlikely(!doc->root && !create_parent)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return false; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_mut_val *root = yyjson_mut_obj(doc); + if (yyjson_unlikely(!root)) { + yyjson_ptr_set_err(MEMORY_ALLOCATION, "failed to create value"); + return false; + } + if (unsafe_yyjson_mut_ptr_putx(root, ptr, len, new_val, doc, + create_parent, true, ctx, err)) { + doc->root = root; + return true; + } + return false; + } + return unsafe_yyjson_mut_ptr_putx(doc->root, ptr, len, new_val, doc, + create_parent, true, ctx, err); +} + +yyjson_api_inline bool yyjson_mut_ptr_add(yyjson_mut_val *val, + const char *ptr, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc) { + if (yyjson_unlikely(!ptr)) return false; + return yyjson_mut_ptr_addn(val, ptr, strlen(ptr), new_val, doc); +} + +yyjson_api_inline bool yyjson_mut_ptr_addn(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc) { + return yyjson_mut_ptr_addx(val, ptr, len, new_val, doc, true, NULL, NULL); +} + +yyjson_api_inline bool yyjson_mut_ptr_addx(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!val || !ptr || !new_val || !doc)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return false; + } + if (yyjson_unlikely(len == 0)) { + yyjson_ptr_set_err(SET_ROOT, "cannot set root"); + return false; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return false; + } + return unsafe_yyjson_mut_ptr_putx(val, ptr, len, new_val, + doc, create_parent, true, ctx, err); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_set(yyjson_mut_doc *doc, + const char *ptr, + yyjson_mut_val *new_val) { + if (yyjson_unlikely(!ptr)) return false; + return yyjson_mut_doc_ptr_setn(doc, ptr, strlen(ptr), new_val); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_setn(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val) { + return yyjson_mut_doc_ptr_setx(doc, ptr, len, new_val, true, NULL, NULL); +} + +yyjson_api_inline bool yyjson_mut_doc_ptr_setx(yyjson_mut_doc *doc, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!doc || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return false; + } + if (yyjson_unlikely(len == 0)) { + if (ctx) ctx->old = doc->root; + doc->root = new_val; + return true; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return false; + } + if (!new_val) { + if (!doc->root) { + yyjson_ptr_set_err(RESOLVE, "JSON pointer cannot be resolved"); + return false; + } + return !!unsafe_yyjson_mut_ptr_removex(doc->root, ptr, len, ctx, err); + } + if (yyjson_unlikely(!doc->root && !create_parent)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return false; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_mut_val *root = yyjson_mut_obj(doc); + if (yyjson_unlikely(!root)) { + yyjson_ptr_set_err(MEMORY_ALLOCATION, "failed to create value"); + return false; + } + if (unsafe_yyjson_mut_ptr_putx(root, ptr, len, new_val, doc, + create_parent, false, ctx, err)) { + doc->root = root; + return true; + } + return false; + } + return unsafe_yyjson_mut_ptr_putx(doc->root, ptr, len, new_val, doc, + create_parent, false, ctx, err); +} + +yyjson_api_inline bool yyjson_mut_ptr_set(yyjson_mut_val *val, + const char *ptr, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc) { + if (yyjson_unlikely(!ptr)) return false; + return yyjson_mut_ptr_setn(val, ptr, strlen(ptr), new_val, doc); +} + +yyjson_api_inline bool yyjson_mut_ptr_setn(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc) { + return yyjson_mut_ptr_setx(val, ptr, len, new_val, doc, true, NULL, NULL); +} + +yyjson_api_inline bool yyjson_mut_ptr_setx(yyjson_mut_val *val, + const char *ptr, size_t len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!val || !ptr || !doc)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return false; + } + if (yyjson_unlikely(len == 0)) { + yyjson_ptr_set_err(SET_ROOT, "cannot set root"); + return false; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return false; + } + if (!new_val) { + return !!unsafe_yyjson_mut_ptr_removex(val, ptr, len, ctx, err); + } + return unsafe_yyjson_mut_ptr_putx(val, ptr, len, new_val, doc, + create_parent, false, ctx, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replace( + yyjson_mut_doc *doc, const char *ptr, yyjson_mut_val *new_val) { + if (!ptr) return NULL; + return yyjson_mut_doc_ptr_replacen(doc, ptr, strlen(ptr), new_val); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replacen( + yyjson_mut_doc *doc, const char *ptr, size_t len, yyjson_mut_val *new_val) { + return yyjson_mut_doc_ptr_replacex(doc, ptr, len, new_val, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_replacex( + yyjson_mut_doc *doc, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!doc || !ptr || !new_val)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + yyjson_mut_val *root = doc->root; + if (yyjson_unlikely(!root)) { + yyjson_ptr_set_err(RESOLVE, "JSON pointer cannot be resolved"); + return NULL; + } + if (ctx) ctx->old = root; + doc->root = new_val; + return root; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return NULL; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_replacex(doc->root, ptr, len, new_val, + ctx, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replace( + yyjson_mut_val *val, const char *ptr, yyjson_mut_val *new_val) { + if (!ptr) return NULL; + return yyjson_mut_ptr_replacen(val, ptr, strlen(ptr), new_val); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replacen( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val) { + return yyjson_mut_ptr_replacex(val, ptr, len, new_val, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_replacex( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!val || !ptr || !new_val)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + yyjson_ptr_set_err(SET_ROOT, "cannot set root"); + return NULL; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_replacex(val, ptr, len, new_val, ctx, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_remove( + yyjson_mut_doc *doc, const char *ptr) { + if (!ptr) return NULL; + return yyjson_mut_doc_ptr_removen(doc, ptr, strlen(ptr)); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_removen( + yyjson_mut_doc *doc, const char *ptr, size_t len) { + return yyjson_mut_doc_ptr_removex(doc, ptr, len, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_ptr_removex( + yyjson_mut_doc *doc, const char *ptr, size_t len, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!doc || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(!doc->root)) { + yyjson_ptr_set_err(NULL_ROOT, "document's root is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + yyjson_mut_val *root = doc->root; + if (ctx) ctx->old = root; + doc->root = NULL; + return root; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_removex(doc->root, ptr, len, ctx, err); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_remove(yyjson_mut_val *val, + const char *ptr) { + if (!ptr) return NULL; + return yyjson_mut_ptr_removen(val, ptr, strlen(ptr)); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_removen(yyjson_mut_val *val, + const char *ptr, + size_t len) { + return yyjson_mut_ptr_removex(val, ptr, len, NULL, NULL); +} + +yyjson_api_inline yyjson_mut_val *yyjson_mut_ptr_removex(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_ptr_set_err(NONE, NULL); + if (ctx) memset(ctx, 0, sizeof(*ctx)); + + if (yyjson_unlikely(!val || !ptr)) { + yyjson_ptr_set_err(PARAMETER, "input parameter is NULL"); + return NULL; + } + if (yyjson_unlikely(len == 0)) { + yyjson_ptr_set_err(SET_ROOT, "cannot set root"); + return NULL; + } + if (yyjson_unlikely(*ptr != '/')) { + yyjson_ptr_set_err(SYNTAX, "no prefix '/'"); + return NULL; + } + return unsafe_yyjson_mut_ptr_removex(val, ptr, len, ctx, err); +} + +yyjson_api_inline bool yyjson_ptr_ctx_append(yyjson_ptr_ctx *ctx, + yyjson_mut_val *key, + yyjson_mut_val *val) { + yyjson_mut_val *ctn, *pre_key, *pre_val, *cur_key, *cur_val; + if (!ctx || !ctx->ctn || !val) return false; + ctn = ctx->ctn; + + if (yyjson_mut_is_obj(ctn)) { + if (!key) return false; + key->next = val; + pre_key = ctx->pre; + if (unsafe_yyjson_get_len(ctn) == 0) { + val->next = key; + ctn->uni.ptr = key; + ctx->pre = key; + } else if (!pre_key) { + pre_key = (yyjson_mut_val *)ctn->uni.ptr; + pre_val = pre_key->next; + val->next = pre_val->next; + pre_val->next = key; + ctn->uni.ptr = key; + ctx->pre = pre_key; + } else { + cur_key = pre_key->next->next; + cur_val = cur_key->next; + val->next = cur_val->next; + cur_val->next = key; + if (ctn->uni.ptr == cur_key) ctn->uni.ptr = key; + ctx->pre = cur_key; + } + } else { + pre_val = ctx->pre; + if (unsafe_yyjson_get_len(ctn) == 0) { + val->next = val; + ctn->uni.ptr = val; + ctx->pre = val; + } else if (!pre_val) { + pre_val = (yyjson_mut_val *)ctn->uni.ptr; + val->next = pre_val->next; + pre_val->next = val; + ctn->uni.ptr = val; + ctx->pre = pre_val; + } else { + cur_val = pre_val->next; + val->next = cur_val->next; + cur_val->next = val; + if (ctn->uni.ptr == cur_val) ctn->uni.ptr = val; + ctx->pre = cur_val; + } + } + unsafe_yyjson_inc_len(ctn); + return true; +} + +yyjson_api_inline bool yyjson_ptr_ctx_replace(yyjson_ptr_ctx *ctx, + yyjson_mut_val *val) { + yyjson_mut_val *ctn, *pre_key, *cur_key, *pre_val, *cur_val; + if (!ctx || !ctx->ctn || !ctx->pre || !val) return false; + ctn = ctx->ctn; + if (yyjson_mut_is_obj(ctn)) { + pre_key = ctx->pre; + pre_val = pre_key->next; + cur_key = pre_val->next; + cur_val = cur_key->next; + /* replace current value */ + cur_key->next = val; + val->next = cur_val->next; + ctx->old = cur_val; + } else { + pre_val = ctx->pre; + cur_val = pre_val->next; + /* replace current value */ + if (pre_val != cur_val) { + val->next = cur_val->next; + pre_val->next = val; + if (ctn->uni.ptr == cur_val) ctn->uni.ptr = val; + } else { + val->next = val; + ctn->uni.ptr = val; + ctx->pre = val; + } + ctx->old = cur_val; + } + return true; +} + +yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx) { + yyjson_mut_val *ctn, *pre_key, *pre_val, *cur_key, *cur_val; + size_t len; + if (!ctx || !ctx->ctn || !ctx->pre) return false; + ctn = ctx->ctn; + if (yyjson_mut_is_obj(ctn)) { + pre_key = ctx->pre; + pre_val = pre_key->next; + cur_key = pre_val->next; + cur_val = cur_key->next; + /* remove current key-value */ + pre_val->next = cur_val->next; + if (ctn->uni.ptr == cur_key) ctn->uni.ptr = pre_key; + ctx->pre = NULL; + ctx->old = cur_val; + } else { + pre_val = ctx->pre; + cur_val = pre_val->next; + /* remove current key-value */ + pre_val->next = cur_val->next; + if (ctn->uni.ptr == cur_val) ctn->uni.ptr = pre_val; + ctx->pre = NULL; + ctx->old = cur_val; + } + len = unsafe_yyjson_get_len(ctn) - 1; + if (len == 0) ctn->uni.ptr = NULL; + unsafe_yyjson_set_len(ctn, len); + return true; +} + +#undef yyjson_ptr_set_err + + + +/*============================================================================== + * JSON Value at Pointer API (Implementation) + *============================================================================*/ + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is type bool. + Returns true if value at `ptr` exists and is the correct type, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_bool( + yyjson_val *root, const char *ptr, bool *value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && yyjson_is_bool(val)) { + *value = unsafe_yyjson_get_bool(val); + return true; + } else { + return false; + } +} + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is an integer + that fits in `uint64_t`. Returns true if successful, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_uint( + yyjson_val *root, const char *ptr, uint64_t *value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && val) { + uint64_t ret = val->uni.u64; + if (unsafe_yyjson_is_uint(val) || + (unsafe_yyjson_is_sint(val) && !(ret >> 63))) { + *value = ret; + return true; + } + } + return false; +} + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is an integer + that fits in `int64_t`. Returns true if successful, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_sint( + yyjson_val *root, const char *ptr, int64_t *value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && val) { + int64_t ret = val->uni.i64; + if (unsafe_yyjson_is_sint(val) || + (unsafe_yyjson_is_uint(val) && ret >= 0)) { + *value = ret; + return true; + } + } + return false; +} + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is type real. + Returns true if value at `ptr` exists and is the correct type, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_real( + yyjson_val *root, const char *ptr, double *value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && yyjson_is_real(val)) { + *value = unsafe_yyjson_get_real(val); + return true; + } else { + return false; + } +} + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is type sint, + uint or real. + Returns true if value at `ptr` exists and is the correct type, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_num( + yyjson_val *root, const char *ptr, double *value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && yyjson_is_num(val)) { + *value = unsafe_yyjson_get_num(val); + return true; + } else { + return false; + } +} + +/** + Set provided `value` if the JSON Pointer (RFC 6901) exists and is type string. + Returns true if value at `ptr` exists and is the correct type, otherwise false. + */ +yyjson_api_inline bool yyjson_ptr_get_str( + yyjson_val *root, const char *ptr, const char **value) { + yyjson_val *val = yyjson_ptr_get(root, ptr); + if (value && yyjson_is_str(val)) { + *value = unsafe_yyjson_get_str(val); + return true; + } else { + return false; + } } + + +/*============================================================================== + * Deprecated + *============================================================================*/ + +/** @deprecated renamed to `yyjson_doc_ptr_get` */ +yyjson_deprecated("renamed to yyjson_doc_ptr_get") yyjson_api_inline yyjson_val *yyjson_doc_get_pointer(yyjson_doc *doc, const char *ptr) { - return yyjson_get_pointer(doc ? doc->root : NULL, ptr); + return yyjson_doc_ptr_get(doc, ptr); } -yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointern(yyjson_mut_val *val, - const char *ptr, - size_t len) { - if (!val || !ptr) return NULL; - if (len == 0) return val; - if (*ptr != '/') return NULL; - return unsafe_yyjson_mut_get_pointer(val, ptr, len); +/** @deprecated renamed to `yyjson_doc_ptr_getn` */ +yyjson_deprecated("renamed to yyjson_doc_ptr_getn") +yyjson_api_inline yyjson_val *yyjson_doc_get_pointern(yyjson_doc *doc, + const char *ptr, + size_t len) { + return yyjson_doc_ptr_getn(doc, ptr, len); } -yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointer(yyjson_mut_val *val, - const char *ptr) { - if (!val || !ptr) return NULL; - return yyjson_mut_get_pointern(val, ptr, strlen(ptr)); +/** @deprecated renamed to `yyjson_mut_doc_ptr_get` */ +yyjson_deprecated("renamed to yyjson_mut_doc_ptr_get") +yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointer( + yyjson_mut_doc *doc, const char *ptr) { + return yyjson_mut_doc_ptr_get(doc, ptr); } +/** @deprecated renamed to `yyjson_mut_doc_ptr_getn` */ +yyjson_deprecated("renamed to yyjson_mut_doc_ptr_getn") yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointern( yyjson_mut_doc *doc, const char *ptr, size_t len) { - return yyjson_mut_get_pointern(doc ? doc->root : NULL, ptr, len); + return yyjson_mut_doc_ptr_getn(doc, ptr, len); } -yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointer( - yyjson_mut_doc *doc, const char *ptr) { - return yyjson_mut_get_pointer(doc ? doc->root : NULL, ptr); +/** @deprecated renamed to `yyjson_ptr_get` */ +yyjson_deprecated("renamed to yyjson_ptr_get") +yyjson_api_inline yyjson_val *yyjson_get_pointer(yyjson_val *val, + const char *ptr) { + return yyjson_ptr_get(val, ptr); +} + +/** @deprecated renamed to `yyjson_ptr_getn` */ +yyjson_deprecated("renamed to yyjson_ptr_getn") +yyjson_api_inline yyjson_val *yyjson_get_pointern(yyjson_val *val, + const char *ptr, + size_t len) { + return yyjson_ptr_getn(val, ptr, len); +} + +/** @deprecated renamed to `yyjson_mut_ptr_get` */ +yyjson_deprecated("renamed to yyjson_mut_ptr_get") +yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointer(yyjson_mut_val *val, + const char *ptr) { + return yyjson_mut_ptr_get(val, ptr); +} + +/** @deprecated renamed to `yyjson_mut_ptr_getn` */ +yyjson_deprecated("renamed to yyjson_mut_ptr_getn") +yyjson_api_inline yyjson_mut_val *yyjson_mut_get_pointern(yyjson_mut_val *val, + const char *ptr, + size_t len) { + return yyjson_mut_ptr_getn(val, ptr, len); +} + +/** @deprecated renamed to `yyjson_mut_ptr_getn` */ +yyjson_deprecated("renamed to unsafe_yyjson_ptr_getn") +yyjson_api_inline yyjson_val *unsafe_yyjson_get_pointer(yyjson_val *val, + const char *ptr, + size_t len) { + yyjson_ptr_err err; + return unsafe_yyjson_ptr_getx(val, ptr, len, &err); +} + +/** @deprecated renamed to `unsafe_yyjson_mut_ptr_getx` */ +yyjson_deprecated("renamed to unsafe_yyjson_mut_ptr_getx") +yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_get_pointer( + yyjson_mut_val *val, const char *ptr, size_t len) { + yyjson_ptr_err err; + return unsafe_yyjson_mut_ptr_getx(val, ptr, len, NULL, &err); } @@ -6012,7 +7920,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_pointer( #endif /* warning suppress end */ #ifdef __cplusplus -// } +} #endif /* extern "C" end */ } // namespace duckdb_yyjson diff --git a/third_party/yyjson/yyjson.cpp b/third_party/yyjson/yyjson.cpp index 94395fb900b9..d8df196fdec1 100644 --- a/third_party/yyjson/yyjson.cpp +++ b/third_party/yyjson/yyjson.cpp @@ -1,19 +1,32 @@ /*============================================================================== - * Created by Yaoyuan on 2019/3/9. - * Copyright (C) 2019 Yaoyuan . - * - * Released under the MIT License: - * https://github.com/ibireme/yyjson/blob/master/LICENSE + Copyright (c) 2020 YaoYuan + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. *============================================================================*/ #include "yyjson.hpp" -#include #include namespace duckdb_yyjson { /*============================================================================== - * Compile Hint Begin + * Warning Suppress *============================================================================*/ /* warning suppress begin */ @@ -23,6 +36,7 @@ namespace duckdb_yyjson { # pragma clang diagnostic ignored "-Wunused-parameter" # pragma clang diagnostic ignored "-Wunused-label" # pragma clang diagnostic ignored "-Wunused-macros" +# pragma clang diagnostic ignored "-Wunused-variable" #elif defined(__GNUC__) # if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) # pragma GCC diagnostic push @@ -34,6 +48,7 @@ namespace duckdb_yyjson { #elif defined(_MSC_VER) # pragma warning(push) # pragma warning(disable:4100) /* unreferenced formal parameter */ +# pragma warning(disable:4101) /* unreferenced variable */ # pragma warning(disable:4102) /* unreferenced label */ # pragma warning(disable:4127) /* conditional expression is constant */ # pragma warning(disable:4706) /* assignment within conditional expression */ @@ -45,7 +60,7 @@ namespace duckdb_yyjson { * Version *============================================================================*/ -yyjson_api uint32_t yyjson_version(void) { +uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; } @@ -55,32 +70,6 @@ yyjson_api uint32_t yyjson_version(void) { * Flags *============================================================================*/ -/* gcc version check */ -#if defined(__GNUC__) -# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) -# define yyjson_gcc_available(major, minor, patch) \ - ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) \ - >= (major * 10000 + minor * 100 + patch)) -# elif defined(__GNUC_MINOR__) -# define yyjson_gcc_available(major, minor, patch) \ - ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) \ - >= (major * 10000 + minor * 100 + patch)) -# else -# define yyjson_gcc_available(major, minor, patch) \ - ((__GNUC__ * 10000) >= (major * 10000 + minor * 100 + patch)) -# endif -#else -# define yyjson_gcc_available(major, minor, patch) 0 -#endif - -/* real gcc check */ -#if !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__ICC) && \ - defined(__GNUC__) && defined(__GNUC_MINOR__) -# define YYJSON_IS_REAL_GCC 1 -#else -# define YYJSON_IS_REAL_GCC 0 -#endif - /* msvc intrinsic */ #if YYJSON_MSC_VER >= 1400 # include @@ -145,35 +134,35 @@ yyjson_api uint32_t yyjson_version(void) { /* Correct rounding in double number computations. - + On the x86 architecture, some compilers may use x87 FPU instructions for floating-point arithmetic. The x87 FPU loads all floating point number as 80-bit double-extended precision internally, then rounds the result to original precision, which may produce inaccurate results. For a more detailed explanation, see the paper: https://arxiv.org/abs/cs/0701192 - + Here are some examples of double precision calculation error: - + 2877.0 / 1e6 == 0.002877, but x87 returns 0.0028770000000000002 43683.0 * 1e21 == 4.3683e25, but x87 returns 4.3683000000000004e25 - + Here are some examples of compiler flags to generate x87 instructions on x86: - + clang -m32 -mno-sse gcc/icc -m32 -mfpmath=387 msvc /arch:SSE or /arch:IA32 - + If we are sure that there's no similar error described above, we can define the YYJSON_DOUBLE_MATH_CORRECT as 1 to enable the fast path calculation. This is not an accurate detection, it's just try to avoid the error at compile-time. An accurate detection can be done at run-time: - + bool is_double_math_correct(void) { volatile double r = 43683.0; r *= 1e21; return r == 4.3683e25; } - + See also: utils.h in https://github.com/google/double-conversion/ */ #if !defined(FLT_EVAL_METHOD) && defined(__FLT_EVAL_METHOD__) @@ -197,52 +186,41 @@ yyjson_api uint32_t yyjson_version(void) { # define YYJSON_DOUBLE_MATH_CORRECT 1 #endif -/* - Microsoft Visual C++ 6.0 doesn't support converting number from u64 to f64: - error C2520: conversion from unsigned __int64 to double not implemented. - */ -#ifndef YYJSON_U64_TO_F64_NO_IMPL -# if (0 < YYJSON_MSC_VER) && (YYJSON_MSC_VER <= 1200) -# define YYJSON_U64_TO_F64_NO_IMPL 1 -# else -# define YYJSON_U64_TO_F64_NO_IMPL 0 -# endif -#endif - /* endian */ #if yyjson_has_include() -# include +# include /* POSIX */ #endif - #if yyjson_has_include() -# include -#elif yyjson_has_include() -# include +# include /* Linux */ #elif yyjson_has_include() -# include +# include /* BSD, Android */ +#elif yyjson_has_include() +# include /* BSD, Darwin */ #endif #define YYJSON_BIG_ENDIAN 4321 #define YYJSON_LITTLE_ENDIAN 1234 -#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ -# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if defined(BYTE_ORDER) && BYTE_ORDER +# if defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN -# elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# elif defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif #elif defined(__BYTE_ORDER) && __BYTE_ORDER -# if __BYTE_ORDER == __BIG_ENDIAN +# if defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN -# elif __BYTE_ORDER == __LITTLE_ENDIAN +# elif defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif -#elif defined(BYTE_ORDER) && BYTE_ORDER -# if BYTE_ORDER == BIG_ENDIAN +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ +# if defined(__ORDER_BIG_ENDIAN__) && \ + (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN -# elif BYTE_ORDER == LITTLE_ENDIAN +# elif defined(__ORDER_LITTLE_ENDIAN__) && \ + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN # endif @@ -253,21 +231,16 @@ yyjson_api uint32_t yyjson_version(void) { defined(__x86_64) || defined(__x86_64__) || \ defined(__amd64) || defined(__amd64__) || \ defined(_M_AMD64) || defined(_M_X64) || \ - defined(__ia64) || defined(_IA64) || defined(__IA64__) || \ - defined(__ia64__) || defined(_M_IA64) || defined(__itanium__) || \ + defined(_M_ARM) || defined(_M_ARM64) || \ defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || \ - defined(__alpha) || defined(__alpha__) || defined(_M_ALPHA) || \ - defined(__riscv) || defined(__riscv__) || \ defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__) || \ - defined(__EMSCRIPTEN__) || defined(__wasm__) + defined(__EMSCRIPTEN__) || defined(__wasm__) || \ + defined(__loongarch__) # define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN #elif (defined(__BIG_ENDIAN__) && __BIG_ENDIAN__ == 1) || \ defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \ defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__) || \ - defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ - defined(__ppc) || defined(__ppc__) || \ - defined(__sparc) || defined(__sparc__) || defined(__sparc64__) || \ defined(__or1k__) || defined(__OR1K__) # define YYJSON_ENDIAN YYJSON_BIG_ENDIAN @@ -276,75 +249,54 @@ yyjson_api uint32_t yyjson_version(void) { #endif /* - Unaligned memory access detection. - - Some architectures cannot perform unaligned memory access, or unaligned memory - accesses can have a large performance penalty. Modern compilers can make some - optimizations for unaligned access. For example: https://godbolt.org/z/Ejo3Pa - - typedef struct { char c[2] } vec2; - void copy_vec2(vec2 *dst, vec2 *src) { - *dst = *src; - } - - Compiler may generate `load/store` or `move` instruction if target architecture - supports unaligned access, otherwise it may generate `call memcpy` instruction. - - We want to avoid `memcpy` calls, so we should disable unaligned access by - define `YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS` as 1 on these architectures. + This macro controls how yyjson handles unaligned memory accesses. + + By default, yyjson uses `memcpy()` for memory copying. This takes advantage of + the compiler's automatic optimizations to generate unaligned memory access + instructions when the target architecture supports it. + + However, for some older compilers or architectures where `memcpy()` isn't + optimized well and may generate unnecessary function calls, consider defining + this macro as 1. In such cases, yyjson switches to manual byte-by-byte access, + potentially improving performance. An example of the generated assembly code on + the ARM platform can be found here: https://godbolt.org/z/334jjhxPT + + As this flag has already been enabled for some common architectures in the + following code, users typically don't need to manually specify it. If users are + unsure about it, please review the generated assembly code or perform actual + benchmark to make an informed decision. */ #ifndef YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS -# if defined(i386) || defined(__i386) || defined(__i386__) || \ - defined(__i486__) || defined(__i586__) || defined(__i686__) || \ - defined(_X86_) || defined(__X86__) || defined(_M_IX86) || \ - defined(__I86__) || defined(__IA32__) || \ - defined(__THW_INTEL) || defined(__THW_INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || \ - defined(__amd64) || defined(__amd64__) || \ - defined(_M_AMD64) || defined(_M_X64) -# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0 /* x86 */ - -# elif defined(__ia64) || defined(_IA64) || defined(__IA64__) || \ +# if defined(__ia64) || defined(_IA64) || defined(__IA64__) || \ defined(__ia64__) || defined(_M_IA64) || defined(__itanium__) # define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 1 /* Itanium */ - -# elif defined(__arm64) || defined(__arm64__) || \ - defined(__AARCH64EL__) || defined(__AARCH64EB__) || \ - defined(__aarch64__) || defined(_M_ARM64) -# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0 /* ARM64 */ - -# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) || \ - defined(__ARM_ARCH_5TEJ__) || defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6KZ__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) +# elif (defined(__arm__) || defined(__arm64__) || defined(__aarch64__)) && \ + (defined(__GNUC__) || defined(__clang__)) && \ + (!defined(__ARM_FEATURE_UNALIGNED) || !__ARM_FEATURE_UNALIGNED) # define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 1 /* ARM */ - -# elif defined(__ppc64__) || defined(__PPC64__) || \ - defined(__powerpc64__) || defined(_ARCH_PPC64) || \ - defined(__ppc) || defined(__ppc__) || defined(__PPC__) || \ - defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || \ - defined(_ARCH_PPC) || defined(_M_PPC) || \ - defined(__PPCGECKO__) || defined(__PPCBROADWAY__) || defined(_XENON) -# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0 /* PowerPC */ - +# elif defined(__sparc) || defined(__sparc__) +# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 1 /* SPARC */ +# elif defined(__mips) || defined(__mips__) || defined(__MIPS__) +# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 1 /* MIPS */ +# elif defined(__m68k__) || defined(M68000) +# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 1 /* M68K */ # else -# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0 /* Unknown */ +# define YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS 0 # endif - #endif /* Estimated initial ratio of the JSON data (data_size / value_count). For example: - + data: {"id":12345678,"name":"Harry"} data_size: 30 value_count: 5 ratio: 6 - + yyjson uses dynamic memory with a growth factor of 1.5 when reading and writing JSON, the ratios below are used to determine the initial memory size. - + A too large ratio will waste memory, and a too small ratio will cause multiple memory growths and degrade performance. Currently, these ratios are generated with some commonly used JSON datasets. @@ -354,6 +306,15 @@ yyjson_api uint32_t yyjson_version(void) { #define YYJSON_WRITER_ESTIMATED_PRETTY_RATIO 32 #define YYJSON_WRITER_ESTIMATED_MINIFY_RATIO 18 +/* The initial and maximum size of the memory pool's chunk in yyjson_mut_doc. */ +#define YYJSON_MUT_DOC_STR_POOL_INIT_SIZE 0x100 +#define YYJSON_MUT_DOC_STR_POOL_MAX_SIZE 0x10000000 +#define YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE (0x10 * sizeof(yyjson_mut_val)) +#define YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE (0x1000000 * sizeof(yyjson_mut_val)) + +/* The minimum size of the dynamic allocator's chunk. */ +#define YYJSON_ALC_DYN_MIN_SIZE 0x1000 + /* Default value for compile-time options. */ #ifndef YYJSON_DISABLE_READER #define YYJSON_DISABLE_READER 0 @@ -361,12 +322,18 @@ yyjson_api uint32_t yyjson_version(void) { #ifndef YYJSON_DISABLE_WRITER #define YYJSON_DISABLE_WRITER 0 #endif +#ifndef YYJSON_DISABLE_UTILS +#define YYJSON_DISABLE_UTILS 0 +#endif #ifndef YYJSON_DISABLE_FAST_FP_CONV #define YYJSON_DISABLE_FAST_FP_CONV 0 #endif #ifndef YYJSON_DISABLE_NON_STANDARD #define YYJSON_DISABLE_NON_STANDARD 0 #endif +#ifndef YYJSON_DISABLE_UTF8_VALIDATION +#define YYJSON_DISABLE_UTF8_VALIDATION 0 +#endif @@ -381,14 +348,15 @@ yyjson_api uint32_t yyjson_version(void) { #define repeat8(x) { x x x x x x x x } #define repeat16(x) { x x x x x x x x x x x x x x x x } -#define repeat2_incr(x) { x(0) x(1) } -#define repeat4_incr(x) { x(0) x(1) x(2) x(3) } -#define repeat8_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) } -#define repeat16_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) \ - x(8) x(9) x(10) x(11) x(12) x(13) x(14) x(15) } -#define repeat_in_1_18(x) { x(1) x(2) x(3) x(4) x(5) x(6) x(7) \ - x(8) x(9) x(10) x(11) x(12) x(13) x(14) x(15) \ - x(16) x(17) x(18) } +#define repeat2_incr(x) { x(0) x(1) } +#define repeat4_incr(x) { x(0) x(1) x(2) x(3) } +#define repeat8_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) } +#define repeat16_incr(x) { x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) \ + x(8) x(9) x(10) x(11) x(12) x(13) x(14) x(15) } + +#define repeat_in_1_18(x) { x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) \ + x(9) x(10) x(11) x(12) x(13) x(14) x(15) x(16) \ + x(17) x(18) } /* Macros used to provide branch prediction information for compiler. */ #undef likely @@ -412,6 +380,33 @@ yyjson_api uint32_t yyjson_version(void) { #undef U64 #define U64(hi, lo) ((((u64)hi##UL) << 32U) + lo##UL) +/* Used to cast away (remove) const qualifier. */ +#define constcast(type) (type)(void *)(size_t)(const void *) + +/* flag test */ +#define has_read_flag(_flag) unlikely(read_flag_eq(flg, YYJSON_READ_##_flag)) +#define has_write_flag(_flag) unlikely(write_flag_eq(flg, YYJSON_WRITE_##_flag)) + +static_inline bool read_flag_eq(yyjson_read_flag flg, yyjson_read_flag chk) { +#if YYJSON_DISABLE_NON_STANDARD + if (chk == YYJSON_READ_ALLOW_INF_AND_NAN || + chk == YYJSON_READ_ALLOW_COMMENTS || + chk == YYJSON_READ_ALLOW_TRAILING_COMMAS || + chk == YYJSON_READ_ALLOW_INVALID_UNICODE) + return false; /* this should be evaluated at compile-time */ +#endif + return (flg & chk) != 0; +} + +static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) { +#if YYJSON_DISABLE_NON_STANDARD + if (chk == YYJSON_WRITE_ALLOW_INF_AND_NAN || + chk == YYJSON_WRITE_ALLOW_INVALID_UNICODE) + return false; /* this should be evaluated at compile-time */ +#endif + return (flg & chk) != 0; +} + /*============================================================================== @@ -426,9 +421,13 @@ yyjson_api uint32_t yyjson_version(void) { #undef USIZE_MAX #define USIZE_MAX ((usize)(~(usize)0)) -/* Maximum number of digits for reading u64 safety. */ +/* Maximum number of digits for reading u32/u64/usize safety (not overflow). */ +#undef U32_SAFE_DIG +#define U32_SAFE_DIG 9 /* u32 max is 4294967295, 10 digits */ #undef U64_SAFE_DIG -#define U64_SAFE_DIG 19 +#define U64_SAFE_DIG 19 /* u64 max is 18446744073709551615, 20 digits */ +#undef USIZE_SAFE_DIG +#define USIZE_SAFE_DIG (sizeof(usize) == 8 ? U64_SAFE_DIG : U32_SAFE_DIG) @@ -511,11 +510,11 @@ __extension__ typedef unsigned __int128 u128; #endif /** 16/32/64-bit vector */ -typedef struct v16 { char c1, c2; } v16; -typedef struct v32 { char c1, c2, c3, c4; } v32; -typedef struct v64 { char c1, c2, c3, c4, c5, c6, c7, c8; } v64; +typedef struct v16 { char c[2]; } v16; +typedef struct v32 { char c[4]; } v32; +typedef struct v64 { char c[8]; } v64; -/** 16/32/64-bit vector union, used for unaligned memory access on modern CPU */ +/** 16/32/64-bit vector union */ typedef union v16_uni { v16 v; u16 u; } v16_uni; typedef union v32_uni { v32 v; u32 u; } v32_uni; typedef union v64_uni { v64 v; u64 u; } v64_uni; @@ -526,121 +525,164 @@ typedef union v64_uni { v64 v; u64 u; } v64_uni; * Load/Store Utils *============================================================================*/ -#define byte_move_idx(x) ((u8 *)dst)[x] = ((u8 *)src)[x]; +#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS + +#define byte_move_idx(x) ((char *)dst)[x] = ((const char *)src)[x]; + +static_inline void byte_copy_2(void *dst, const void *src) { + repeat2_incr(byte_move_idx) +} + +static_inline void byte_copy_4(void *dst, const void *src) { + repeat4_incr(byte_move_idx) +} + +static_inline void byte_copy_8(void *dst, const void *src) { + repeat8_incr(byte_move_idx) +} + +static_inline void byte_copy_16(void *dst, const void *src) { + repeat16_incr(byte_move_idx) +} static_inline void byte_move_2(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat2_incr(byte_move_idx); -#else - memmove(dst, src, 2); -#endif + repeat2_incr(byte_move_idx) } static_inline void byte_move_4(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat4_incr(byte_move_idx); -#else - memmove(dst, src, 4); -#endif + repeat4_incr(byte_move_idx) } static_inline void byte_move_8(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat8_incr(byte_move_idx); -#else - memmove(dst, src, 8); -#endif + repeat8_incr(byte_move_idx) } static_inline void byte_move_16(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat16_incr(byte_move_idx); -#else - memmove(dst, src, 16); -#endif + repeat16_incr(byte_move_idx) } -static_inline void byte_copy_2(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat2_incr(byte_move_idx); +static_inline bool byte_match_2(void *buf, const char *pat) { + return + ((char *)buf)[0] == ((const char *)pat)[0] && + ((char *)buf)[1] == ((const char *)pat)[1]; +} + +static_inline bool byte_match_4(void *buf, const char *pat) { + return + ((char *)buf)[0] == ((const char *)pat)[0] && + ((char *)buf)[1] == ((const char *)pat)[1] && + ((char *)buf)[2] == ((const char *)pat)[2] && + ((char *)buf)[3] == ((const char *)pat)[3]; +} + +static_inline u16 byte_load_2(const void *src) { + v16_uni uni; + uni.v.c[0] = ((const char *)src)[0]; + uni.v.c[1] = ((const char *)src)[1]; + return uni.u; +} + +static_inline u32 byte_load_3(const void *src) { + v32_uni uni; + uni.v.c[0] = ((const char *)src)[0]; + uni.v.c[1] = ((const char *)src)[1]; + uni.v.c[2] = ((const char *)src)[2]; + uni.v.c[3] = 0; + return uni.u; +} + +static_inline u32 byte_load_4(const void *src) { + v32_uni uni; + uni.v.c[0] = ((const char *)src)[0]; + uni.v.c[1] = ((const char *)src)[1]; + uni.v.c[2] = ((const char *)src)[2]; + uni.v.c[3] = ((const char *)src)[3]; + return uni.u; +} + +#undef byte_move_expr + #else + +static_inline void byte_copy_2(void *dst, const void *src) { memcpy(dst, src, 2); -#endif } static_inline void byte_copy_4(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat4_incr(byte_move_idx); -#else memcpy(dst, src, 4); -#endif } static_inline void byte_copy_8(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat8_incr(byte_move_idx); -#else memcpy(dst, src, 8); -#endif } static_inline void byte_copy_16(void *dst, const void *src) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - repeat16_incr(byte_move_idx); -#else memcpy(dst, src, 16); -#endif +} + +static_inline void byte_move_2(void *dst, const void *src) { + u16 tmp; + memcpy(&tmp, src, 2); + memcpy(dst, &tmp, 2); +} + +static_inline void byte_move_4(void *dst, const void *src) { + u32 tmp; + memcpy(&tmp, src, 4); + memcpy(dst, &tmp, 4); +} + +static_inline void byte_move_8(void *dst, const void *src) { + u64 tmp; + memcpy(&tmp, src, 8); + memcpy(dst, &tmp, 8); +} + +static_inline void byte_move_16(void *dst, const void *src) { + char *pdst = (char *)dst; + const char *psrc = (const char *)src; + u64 tmp1, tmp2; + memcpy(&tmp1, psrc, 8); + memcpy(&tmp2, psrc + 8, 8); + memcpy(pdst, &tmp1, 8); + memcpy(pdst + 8, &tmp2, 8); } static_inline bool byte_match_2(void *buf, const char *pat) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - return - ((u8 *)buf)[0] == ((const u8 *)pat)[0] && - ((u8 *)buf)[1] == ((const u8 *)pat)[1]; -#else v16_uni u1, u2; - u1.v = *(const v16 *)pat; - u2.v = *(const v16 *)buf; + memcpy(&u1, buf, 2); + memcpy(&u2, pat, 2); return u1.u == u2.u; -#endif } static_inline bool byte_match_4(void *buf, const char *pat) { -#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS - return - ((u8 *)buf)[0] == ((const u8 *)pat)[0] && - ((u8 *)buf)[1] == ((const u8 *)pat)[1] && - ((u8 *)buf)[2] == ((const u8 *)pat)[2] && - ((u8 *)buf)[3] == ((const u8 *)pat)[3]; -#else v32_uni u1, u2; - u1.v = *(const v32 *)pat; - u2.v = *(const v32 *)buf; + memcpy(&u1, buf, 4); + memcpy(&u2, pat, 4); return u1.u == u2.u; -#endif } static_inline u16 byte_load_2(const void *src) { v16_uni uni; - uni.v = *(v16 *)src; + memcpy(&uni, src, 2); return uni.u; } static_inline u32 byte_load_3(const void *src) { v32_uni uni; - ((v16_uni *)&uni)->v = *(v16 *)src; - uni.v.c3 = ((char *)src)[2]; - uni.v.c4 = 0; + memcpy(&uni, src, 2); + uni.v.c[2] = ((const char *)src)[2]; + uni.v.c[3] = 0; return uni.u; } static_inline u32 byte_load_4(const void *src) { v32_uni uni; - uni.v = *(v32 *)src; + memcpy(&uni, src, 4); return uni.u; } -#undef byte_move_expr +#endif @@ -649,37 +691,20 @@ static_inline u32 byte_load_4(const void *src) { * These functions are used to detect and convert NaN and Inf numbers. *============================================================================*/ -/** - This union is used to avoid violating the strict aliasing rule in C. - `memcpy` can be used in both C and C++, but it may reduce performance without - compiler optimization. - */ -typedef union { u64 u; f64 f; } f64_uni; - /** Convert raw binary to double. */ static_inline f64 f64_from_raw(u64 u) { -#ifndef __cplusplus - f64_uni uni; - uni.u = u; - return uni.f; -#else + /* use memcpy to avoid violating the strict aliasing rule */ f64 f; memcpy(&f, &u, 8); return f; -#endif } /** Convert double to raw binary. */ static_inline u64 f64_to_raw(f64 f) { -#ifndef __cplusplus - f64_uni uni; - uni.f = f; - return uni.u; -#else + /* use memcpy to avoid violating the strict aliasing rule */ u64 u; memcpy(&u, &f, 8); return u; -#endif } /** Get raw 'infinity' with sign. */ @@ -706,7 +731,7 @@ static_inline u64 f64_raw_get_nan(bool sign) { /** Convert normalized u64 (highest bit is 1) to f64. - + Some compiler (such as Microsoft Visual C++ 6.0) do not support converting number from u64 to f64. This function will first convert u64 to i64 and then to f64, with `to nearest` rounding mode. @@ -729,8 +754,7 @@ static_inline f64 normalized_u64_to_f64(u64 val) { /** Returns whether the size is overflow after increment. */ static_inline bool size_add_is_overflow(usize size, usize add) { - usize val = size + add; - return (val < size) | (val < add); + return size > (size + add); } /** Returns whether the size is power of 2 (size should not be 0). */ @@ -765,15 +789,6 @@ static_inline void *mem_align_up(void *mem, usize align) { return mem; } -/** Align address downwards. */ -static_inline void *mem_align_down(void *mem, usize align) { - usize size; - memcpy(&size, &mem, sizeof(usize)); - size = size_align_down(size, align); - memcpy(&mem, &size, sizeof(usize)); - return mem; -} - /*============================================================================== @@ -959,6 +974,15 @@ static const yyjson_alc YYJSON_DEFAULT_ALC = { NULL }; + + +/*============================================================================== + * Null Memory Allocator + * + * This allocator is just a placeholder to ensure that the internal + * malloc/realloc/free function pointers are not null. + *============================================================================*/ + static void *null_malloc(void *ctx, usize size) { return NULL; } @@ -982,30 +1006,38 @@ static const yyjson_alc YYJSON_NULL_ALC = { /*============================================================================== * Pool Memory Allocator - * This is a simple memory allocator that uses linked list memory chunk. - * The following code will be executed only when the library user creates - * this allocator manually. + * + * This allocator is initialized with a fixed-size buffer. + * The buffer is split into multiple memory chunks for memory allocation. *============================================================================*/ -/** chunk header */ +/** memory chunk header */ typedef struct pool_chunk { - usize size; /* chunk memory size (include chunk header) */ - struct pool_chunk *next; + usize size; /* chunk memory size, include chunk header */ + struct pool_chunk *next; /* linked list, nullable */ + /* char mem[]; flexible array member */ } pool_chunk; -/** ctx header */ +/** allocator ctx header */ typedef struct pool_ctx { - usize size; /* total memory size (include ctx header) */ - pool_chunk *free_list; + usize size; /* total memory size, include ctx header */ + pool_chunk *free_list; /* linked list, nullable */ + /* pool_chunk chunks[]; flexible array member */ } pool_ctx; +/** align up the input size to chunk size */ +static_inline void pool_size_align(usize *size) { + *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk); +} + static void *pool_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *next, *prev = NULL, *cur = ctx->free_list; - - if (unlikely(size == 0 || size >= ctx->size)) return NULL; - size = size_align_up(size, sizeof(pool_chunk)) + sizeof(pool_chunk); - + + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&size); + while (cur) { if (cur->size < size) { /* not enough space, try next chunk */ @@ -1031,10 +1063,11 @@ static void *pool_malloc(void *ctx_ptr, usize size) { } static void pool_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *cur = ((pool_chunk *)ptr) - 1; pool_chunk *prev = NULL, *next = ctx->free_list; - + while (next && next < cur) { prev = next; next = next->next; @@ -1042,7 +1075,7 @@ static void pool_free(void *ctx_ptr, void *ptr) { if (prev) prev->next = cur; else ctx->free_list = cur; cur->next = next; - + if (next && ((u8 *)cur + cur->size) == (u8 *)next) { /* merge cur to higher chunk */ cur->size += next->size; @@ -1057,26 +1090,16 @@ static void pool_free(void *ctx_ptr, void *ptr) { static void *pool_realloc(void *ctx_ptr, void *ptr, usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ pool_ctx *ctx = (pool_ctx *)ctx_ptr; pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp; - usize free_size; - void *new_ptr; - - if (unlikely(size == 0 || size >= ctx->size)) return NULL; - size = size_align_up(size, sizeof(pool_chunk)) + sizeof(pool_chunk); - - /* reduce size */ - if (unlikely(size <= cur->size)) { - free_size = cur->size - size; - if (free_size >= sizeof(pool_chunk) * 2) { - tmp = (pool_chunk *)(void *)((u8 *)cur + cur->size - free_size); - tmp->size = free_size; - pool_free(ctx_ptr, (void *)(tmp + 1)); - cur->size -= free_size; - } - return ptr; - } - + + /* check size */ + if (unlikely(size >= ctx->size)) return NULL; + pool_size_align(&old_size); + pool_size_align(&size); + if (unlikely(old_size == size)) return ptr; + /* find next and prev chunk */ prev = NULL; next = ctx->free_list; @@ -1084,11 +1107,10 @@ static void *pool_realloc(void *ctx_ptr, void *ptr, prev = next; next = next->next; } - - /* merge to higher chunk if they are contiguous */ - if ((u8 *)cur + cur->size == (u8 *)next && - cur->size + next->size >= size) { - free_size = cur->size + next->size - size; + + if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) { + /* merge to higher chunk if they are contiguous */ + usize free_size = cur->size + next->size - size; if (free_size > sizeof(pool_chunk) * 2) { tmp = (pool_chunk *)(void *)((u8 *)cur + size); if (prev) prev->next = tmp; @@ -1102,21 +1124,21 @@ static void *pool_realloc(void *ctx_ptr, void *ptr, cur->size += next->size; } return ptr; + } else { + /* fallback to malloc and memcpy */ + void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); + if (new_ptr) { + memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); + pool_free(ctx_ptr, ptr); + } + return new_ptr; } - - /* fallback to malloc and memcpy */ - new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk)); - if (new_ptr) { - memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk)); - pool_free(ctx_ptr, ptr); - } - return new_ptr; } bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { pool_chunk *chunk; pool_ctx *ctx; - + if (unlikely(!alc)) return false; *alc = YYJSON_NULL_ALC; if (size < sizeof(pool_ctx) * 4) return false; @@ -1124,13 +1146,13 @@ bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { if (unlikely(!ctx)) return false; size -= (usize)((u8 *)ctx - (u8 *)buf); size = size_align_down(size, sizeof(pool_ctx)); - + chunk = (pool_chunk *)(ctx + 1); chunk->size = size - sizeof(pool_ctx); chunk->next = NULL; ctx->size = size; ctx->free_list = chunk; - + alc->malloc = pool_malloc; alc->realloc = pool_realloc; alc->free = pool_free; @@ -1140,6 +1162,161 @@ bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) { +/*============================================================================== + * Dynamic Memory Allocator + * + * This allocator allocates memory on demand and does not immediately release + * unused memory. Instead, it places the unused memory into a freelist for + * potential reuse in the future. It is only when the entire allocator is + * destroyed that all previously allocated memory is released at once. + *============================================================================*/ + +/** memory chunk header */ +typedef struct dyn_chunk { + usize size; /* chunk size, include header */ + struct dyn_chunk *next; + /* char mem[]; flexible array member */ +} dyn_chunk; + +/** allocator ctx header */ +typedef struct { + dyn_chunk free_list; /* dummy header, sorted from small to large */ + dyn_chunk used_list; /* dummy header */ +} dyn_ctx; + +/** align up the input size to chunk size */ +static_inline bool dyn_size_align(usize *size) { + usize alc_size = *size + sizeof(dyn_chunk); + alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE); + if (unlikely(alc_size < *size)) return false; /* overflow */ + *size = alc_size; + return true; +} + +/** remove a chunk from list (the chunk must already be in the list) */ +static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) { + dyn_chunk *prev = list, *cur; + for (cur = prev->next; cur; cur = cur->next) { + if (cur == chunk) { + prev->next = cur->next; + cur->next = NULL; + return; + } + prev = cur; + } +} + +/** add a chunk to list header (the chunk must not be in the list) */ +static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) { + chunk->next = list->next; + list->next = chunk; +} + +static void *dyn_malloc(void *ctx_ptr, usize size) { + /* assert(size != 0) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk, *prev, *next; + if (unlikely(!dyn_size_align(&size))) return NULL; + + /* freelist is empty, create new chunk */ + if (!ctx->free_list.next) { + chunk = (dyn_chunk *)def.malloc(def.ctx, size); + if (unlikely(!chunk)) return NULL; + chunk->size = size; + chunk->next = NULL; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + + /* find a large enough chunk, or resize the largest chunk */ + prev = &ctx->free_list; + while (true) { + chunk = prev->next; + if (chunk->size >= size) { /* enough size, reuse this chunk */ + prev->next = chunk->next; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + if (!chunk->next) { /* resize the largest chunk */ + chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (unlikely(!chunk)) return NULL; + prev->next = NULL; + chunk->size = size; + dyn_chunk_list_add(&ctx->used_list, chunk); + return (void *)(chunk + 1); + } + prev = chunk; + } +} + +static void *dyn_realloc(void *ctx_ptr, void *ptr, + usize old_size, usize size) { + /* assert(ptr != NULL && size != 0 && old_size < size) */ + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *prev, *next, *new_chunk; + dyn_chunk *chunk = (dyn_chunk *)ptr - 1; + if (unlikely(!dyn_size_align(&size))) return NULL; + if (chunk->size >= size) return ptr; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size); + if (likely(new_chunk)) { + new_chunk->size = size; + chunk = new_chunk; + } + dyn_chunk_list_add(&ctx->used_list, chunk); + return new_chunk ? (void *)(new_chunk + 1) : NULL; +} + +static void dyn_free(void *ctx_ptr, void *ptr) { + /* assert(ptr != NULL) */ + dyn_ctx *ctx = (dyn_ctx *)ctx_ptr; + dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev; + + dyn_chunk_list_remove(&ctx->used_list, chunk); + for (prev = &ctx->free_list; prev; prev = prev->next) { + if (!prev->next || prev->next->size >= chunk->size) { + chunk->next = prev->next; + prev->next = chunk; + break; + } + } +} + +yyjson_alc *yyjson_alc_dyn_new(void) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx); + yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len); + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + if (unlikely(!alc)) return NULL; + alc->malloc = dyn_malloc; + alc->realloc = dyn_realloc; + alc->free = dyn_free; + alc->ctx = alc + 1; + memset(ctx, 0, sizeof(*ctx)); + return alc; +} + +void yyjson_alc_dyn_free(yyjson_alc *alc) { + const yyjson_alc def = YYJSON_DEFAULT_ALC; + dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1); + dyn_chunk *chunk, *next; + if (unlikely(!alc)) return; + for (chunk = ctx->free_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + for (chunk = ctx->used_list.next; chunk; chunk = next) { + next = chunk->next; + def.free(def.ctx, chunk); + } + def.free(def.ctx, alc); +} + + + /*============================================================================== * JSON document and value *============================================================================*/ @@ -1167,17 +1344,26 @@ static_inline void unsafe_yyjson_val_pool_release(yyjson_val_pool *pool, bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, const yyjson_alc *alc, usize len) { yyjson_str_chunk *chunk; - usize size = len + sizeof(yyjson_str_chunk); + usize size, max_len; + + /* create a new chunk */ + max_len = USIZE_MAX - sizeof(yyjson_str_chunk); + if (unlikely(len > max_len)) return false; + size = len + sizeof(yyjson_str_chunk); size = yyjson_max(pool->chunk_size, size); chunk = (yyjson_str_chunk *)alc->malloc(alc->ctx, size); - if (yyjson_unlikely(!chunk)) return false; - + if (unlikely(!chunk)) return false; + + /* insert the new chunk as the head of the linked list */ chunk->next = pool->chunks; + chunk->chunk_size = size; pool->chunks = chunk; pool->cur = (char *)chunk + sizeof(yyjson_str_chunk); pool->end = (char *)chunk + size; - + + /* the next chunk is twice the size of the current one */ size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); + if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ pool->chunk_size = size; return true; } @@ -1185,28 +1371,48 @@ bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, const yyjson_alc *alc, usize count) { yyjson_val_chunk *chunk; - usize size; - - if (count >= USIZE_MAX / sizeof(yyjson_mut_val) - 16) return false; + usize size, max_count; + + /* create a new chunk */ + max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; + if (unlikely(count > max_count)) return false; size = (count + 1) * sizeof(yyjson_mut_val); size = yyjson_max(pool->chunk_size, size); chunk = (yyjson_val_chunk *)alc->malloc(alc->ctx, size); - if (yyjson_unlikely(!chunk)) return false; - + if (unlikely(!chunk)) return false; + + /* insert the new chunk as the head of the linked list */ chunk->next = pool->chunks; + chunk->chunk_size = size; pool->chunks = chunk; - pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk - + sizeof(yyjson_mut_val)); + pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk) + 1; pool->end = (yyjson_mut_val *)(void *)((u8 *)chunk + size); - + + /* the next chunk is twice the size of the current one */ size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max); + if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */ pool->chunk_size = size; return true; } +bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, size_t len) { + usize max_size = USIZE_MAX - sizeof(yyjson_str_chunk); + if (!doc || !len || len > max_size) return false; + doc->str_pool.chunk_size = len + sizeof(yyjson_str_chunk); + return true; +} + +bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) { + usize max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1; + if (!doc || !count || count > max_count) return false; + doc->val_pool.chunk_size = (count + 1) * sizeof(yyjson_mut_val); + return true; +} + void yyjson_mut_doc_free(yyjson_mut_doc *doc) { if (doc) { yyjson_alc alc = doc->alc; + memset(&doc->alc, 0, sizeof(alc)); unsafe_yyjson_str_pool_release(&doc->str_pool, &alc); unsafe_yyjson_val_pool_release(&doc->val_pool, &alc); alc.free(alc.ctx, doc); @@ -1219,20 +1425,19 @@ yyjson_mut_doc *yyjson_mut_doc_new(const yyjson_alc *alc) { doc = (yyjson_mut_doc *)alc->malloc(alc->ctx, sizeof(yyjson_mut_doc)); if (!doc) return NULL; memset(doc, 0, sizeof(yyjson_mut_doc)); - + doc->alc = *alc; - doc->str_pool.chunk_size = 0x100; - doc->str_pool.chunk_size_max = 0x10000000; - doc->val_pool.chunk_size = 0x10 * sizeof(yyjson_mut_val); - doc->val_pool.chunk_size_max = 0x1000000 * sizeof(yyjson_mut_val); + doc->str_pool.chunk_size = YYJSON_MUT_DOC_STR_POOL_INIT_SIZE; + doc->str_pool.chunk_size_max = YYJSON_MUT_DOC_STR_POOL_MAX_SIZE; + doc->val_pool.chunk_size = YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE; + doc->val_pool.chunk_size_max = YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE; return doc; } -yyjson_api yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, - const yyjson_alc *alc) { +yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, const yyjson_alc *alc) { yyjson_mut_doc *m_doc; yyjson_mut_val *m_val; - + if (!doc || !doc->root) return NULL; m_doc = yyjson_mut_doc_new(alc); if (!m_doc) return NULL; @@ -1245,12 +1450,14 @@ yyjson_api yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, return m_doc; } -yyjson_api yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc, - const yyjson_alc *alc) { +yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc, + const yyjson_alc *alc) { yyjson_mut_doc *m_doc; yyjson_mut_val *m_val; - - if (!doc || !doc->root) return NULL; + + if (!doc) return NULL; + if (!doc->root) return yyjson_mut_doc_new(alc); + m_doc = yyjson_mut_doc_new(alc); if (!m_doc) return NULL; m_val = yyjson_mut_val_mut_copy(m_doc, doc->root); @@ -1262,18 +1469,17 @@ yyjson_api yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc, return m_doc; } -yyjson_api yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, - yyjson_val *i_vals) { +yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, + yyjson_val *i_vals) { /* The immutable object or array stores all sub-values in a contiguous memory, We copy them to another contiguous memory as mutable values, then reconnect the mutable values with the original relationship. */ - usize i_vals_len; yyjson_mut_val *m_vals, *m_val; yyjson_val *i_val, *i_end; - + if (!m_doc || !i_vals) return NULL; i_end = unsafe_yyjson_get_next(i_vals); i_vals_len = (usize)(unsafe_yyjson_get_next(i_vals) - i_vals); @@ -1281,7 +1487,7 @@ yyjson_api yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, if (!m_vals) return NULL; i_val = i_vals; m_val = m_vals; - + for (; i_val < i_end; i_val++, m_val++) { yyjson_type type = unsafe_yyjson_get_type(i_val); m_val->tag = i_val->tag; @@ -1326,7 +1532,7 @@ yyjson_api yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, } } } - + return m_vals; } @@ -1339,11 +1545,10 @@ static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, second to last item, which needs to be linked to the last item to close the circle. */ - yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1); if (unlikely(!m_val)) return NULL; m_val->tag = m_vals->tag; - + switch (unsafe_yyjson_get_type(m_vals)) { case YYJSON_TYPE_OBJ: case YYJSON_TYPE_ARR: @@ -1362,7 +1567,7 @@ static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, prev->next = (yyjson_mut_val *)m_val->uni.ptr; } break; - + case YYJSON_TYPE_RAW: case YYJSON_TYPE_STR: { const char *str = m_vals->uni.str; @@ -1371,17 +1576,17 @@ static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc, if (!m_val->uni.str) return NULL; break; } - + default: m_val->uni = m_vals->uni; break; } - + return m_val; } -yyjson_api yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc, - yyjson_mut_val *val) { +yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc, + yyjson_mut_val *val) { if (doc && val) return unsafe_yyjson_mut_val_mut_copy(doc, val); return NULL; } @@ -1437,7 +1642,7 @@ static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr, } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) { char *buf = *buf_ptr; usize len = unsafe_yyjson_get_len(mval); - memcpy((void *)buf, (void *)mval->uni.str, len); + memcpy((void *)buf, (const void *)mval->uni.str, len); buf[len] = '\0'; val->tag = mval->tag; val->uni.str = buf; @@ -1452,37 +1657,37 @@ static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr, } } -yyjson_api yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc, - const yyjson_alc *alc) { +yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc, + const yyjson_alc *alc) { if (!mdoc) return NULL; return yyjson_mut_val_imut_copy(mdoc->root, alc); } -yyjson_api yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval, - const yyjson_alc *alc) { +yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval, + const yyjson_alc *alc) { usize val_num = 0, str_sum = 0, hdr_size, buf_size; yyjson_doc *doc = NULL; yyjson_val *val_hdr = NULL; - + /* This value should be NULL here. Setting a non-null value suppresses warning from the clang analyzer. */ char *str_hdr = (char *)(void *)&str_sum; if (!mval) return NULL; if (!alc) alc = &YYJSON_DEFAULT_ALC; - + /* traverse the input value to get pool size */ yyjson_mut_stat(mval, &val_num, &str_sum); - + /* create doc and val pool */ hdr_size = size_align_up(sizeof(yyjson_doc), sizeof(yyjson_val)); buf_size = hdr_size + val_num * sizeof(yyjson_val); doc = (yyjson_doc *)alc->malloc(alc->ctx, buf_size); if (!doc) return NULL; memset(doc, 0, sizeof(yyjson_doc)); - val_hdr = (yyjson_val *)((char *)(void *)doc + hdr_size); + val_hdr = (yyjson_val *)(void *)((char *)(void *)doc + hdr_size); doc->root = val_hdr; doc->alc = *alc; - + /* create str pool */ if (str_sum > 0) { str_hdr = (char *)alc->malloc(alc->ctx, str_sum); @@ -1492,7 +1697,7 @@ yyjson_api yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval, return NULL; } } - + /* copy vals and strs */ doc->val_read = yyjson_imut_copy(&val_hdr, &str_hdr, mval); doc->dat_read = str_sum + 1; @@ -1504,27 +1709,27 @@ static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) { yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni; yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs); yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs); - if (lt == rt) - return luni->u64 == runi->u64; - if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) + if (lt == rt) return luni->u64 == runi->u64; + if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) { return luni->i64 >= 0 && luni->u64 == runi->u64; - if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) + } + if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) { return runi->i64 >= 0 && luni->u64 == runi->u64; + } return false; } static_inline bool unsafe_yyjson_str_equals(void *lhs, void *rhs) { usize len = unsafe_yyjson_get_len(lhs); if (len != unsafe_yyjson_get_len(rhs)) return false; - return 0 == len || - 0 == memcmp(unsafe_yyjson_get_str(lhs), - unsafe_yyjson_get_str(rhs), len); + return !memcmp(unsafe_yyjson_get_str(lhs), + unsafe_yyjson_get_str(rhs), len); } -yyjson_api bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { +bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { yyjson_type type = unsafe_yyjson_get_type(lhs); if (type != unsafe_yyjson_get_type(rhs)) return false; - + switch (type) { case YYJSON_TYPE_OBJ: { usize len = unsafe_yyjson_get_len(lhs); @@ -1536,15 +1741,15 @@ yyjson_api bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { while (len-- > 0) { rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str, unsafe_yyjson_get_len(lhs)); - if (!rhs || !unsafe_yyjson_equals(lhs + 1, rhs)) - return false; + if (!rhs) return false; + if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false; lhs = unsafe_yyjson_get_next(lhs + 1); } } /* yyjson allows duplicate keys, so the check may be inaccurate */ return true; } - + case YYJSON_TYPE_ARR: { usize len = unsafe_yyjson_get_len(lhs); if (len != unsafe_yyjson_get_len(rhs)) return false; @@ -1552,26 +1757,25 @@ yyjson_api bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { lhs = unsafe_yyjson_get_first(lhs); rhs = unsafe_yyjson_get_first(rhs); while (len-- > 0) { - if (!unsafe_yyjson_equals(lhs, rhs)) - return false; + if (!unsafe_yyjson_equals(lhs, rhs)) return false; lhs = unsafe_yyjson_get_next(lhs); rhs = unsafe_yyjson_get_next(rhs); } } return true; } - + case YYJSON_TYPE_NUM: return unsafe_yyjson_num_equals(lhs, rhs); - + case YYJSON_TYPE_RAW: case YYJSON_TYPE_STR: return unsafe_yyjson_str_equals(lhs, rhs); - + case YYJSON_TYPE_NULL: case YYJSON_TYPE_BOOL: return lhs->tag == rhs->tag; - + default: return false; } @@ -1580,7 +1784,7 @@ yyjson_api bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) { bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { yyjson_type type = unsafe_yyjson_get_type(lhs); if (type != unsafe_yyjson_get_type(rhs)) return false; - + switch (type) { case YYJSON_TYPE_OBJ: { usize len = unsafe_yyjson_get_len(lhs); @@ -1592,15 +1796,15 @@ bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { while (len-- > 0) { rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str, unsafe_yyjson_get_len(lhs)); - if (!rhs || !unsafe_yyjson_mut_equals(lhs->next, rhs)) - return false; + if (!rhs) return false; + if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false; lhs = lhs->next->next; } } /* yyjson allows duplicate keys, so the check may be inaccurate */ return true; } - + case YYJSON_TYPE_ARR: { usize len = unsafe_yyjson_get_len(lhs); if (len != unsafe_yyjson_get_len(rhs)) return false; @@ -1608,231 +1812,891 @@ bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) { lhs = (yyjson_mut_val *)lhs->uni.ptr; rhs = (yyjson_mut_val *)rhs->uni.ptr; while (len-- > 0) { - if (!unsafe_yyjson_mut_equals(lhs, rhs)) - return false; + if (!unsafe_yyjson_mut_equals(lhs, rhs)) return false; lhs = lhs->next; rhs = rhs->next; } } return true; } - - case YYJSON_TYPE_NUM: - return unsafe_yyjson_num_equals(lhs, rhs); - - case YYJSON_TYPE_RAW: - case YYJSON_TYPE_STR: - return unsafe_yyjson_str_equals(lhs, rhs); - - case YYJSON_TYPE_NULL: - case YYJSON_TYPE_BOOL: - return lhs->tag == rhs->tag; - - default: - return false; + + case YYJSON_TYPE_NUM: + return unsafe_yyjson_num_equals(lhs, rhs); + + case YYJSON_TYPE_RAW: + case YYJSON_TYPE_STR: + return unsafe_yyjson_str_equals(lhs, rhs); + + case YYJSON_TYPE_NULL: + case YYJSON_TYPE_BOOL: + return lhs->tag == rhs->tag; + + default: + return false; + } +} + + + +#if !YYJSON_DISABLE_UTILS + +/*============================================================================== + * JSON Pointer API (RFC 6901) + *============================================================================*/ + +/** + Get a token from JSON pointer string. + @param ptr [in,out] + in: string that points to current token prefix `/` + out: string that points to next token prefix `/`, or string end + @param end [in] end of the entire JSON Pointer string + @param len [out] unescaped token length + @param esc [out] number of escaped characters in this token + @return head of the token, or NULL if syntax error + */ +static_inline const char *ptr_next_token(const char **ptr, const char *end, + usize *len, usize *esc) { + const char *hdr = *ptr + 1; + const char *cur = hdr; + /* skip unescaped characters */ + while (cur < end && *cur != '/' && *cur != '~') cur++; + if (likely(cur == end || *cur != '~')) { + /* no escaped characters, return */ + *ptr = cur; + *len = (usize)(cur - hdr); + *esc = 0; + return hdr; + } else { + /* handle escaped characters */ + usize esc_num = 0; + while (cur < end && *cur != '/') { + if (*cur++ == '~') { + if (cur == end || (*cur != '0' && *cur != '1')) { + *ptr = cur - 1; + return NULL; + } + esc_num++; + } + } + *ptr = cur; + *len = (usize)(cur - hdr) - esc_num; + *esc = esc_num; + return hdr; + } +} + +/** + Convert token string to index. + @param cur [in] token head + @param len [in] token length + @param idx [out] the index number, or USIZE_MAX if token is '-' + @return true if token is a valid array index + */ +static_inline bool ptr_token_to_idx(const char *cur, usize len, usize *idx) { + const char *end = cur + len; + usize num = 0, add; + if (unlikely(len == 0 || len > USIZE_SAFE_DIG)) return false; + if (*cur == '0') { + if (unlikely(len > 1)) return false; + *idx = 0; + return true; + } + if (*cur == '-') { + if (unlikely(len > 1)) return false; + *idx = USIZE_MAX; + return true; + } + for (; cur < end && (add = (usize)((u8)*cur - (u8)'0')) <= 9; cur++) { + num = num * 10 + add; + } + if (unlikely(num == 0 || cur < end)) return false; + *idx = num; + return true; +} + +/** + Compare JSON key with token. + @param key a string key (yyjson_val or yyjson_mut_val) + @param token a JSON pointer token + @param len unescaped token length + @param esc number of escaped characters in this token + @return true if `str` is equals to `token` + */ +static_inline bool ptr_token_eq(void *key, + const char *token, usize len, usize esc) { + yyjson_val *val = (yyjson_val *)key; + if (unsafe_yyjson_get_len(val) != len) return false; + if (likely(!esc)) { + return memcmp(val->uni.str, token, len) == 0; + } else { + const char *str = val->uni.str; + for (; len-- > 0; token++, str++) { + if (*token == '~') { + if (*str != (*++token == '0' ? '~' : '/')) return false; + } else { + if (*str != *token) return false; + } + } + return true; + } +} + +/** + Get a value from array by token. + @param arr an array, should not be NULL or non-array type + @param token a JSON pointer token + @param len unescaped token length + @param esc number of escaped characters in this token + @return value at index, or NULL if token is not index or index is out of range + */ +static_inline yyjson_val *ptr_arr_get(yyjson_val *arr, const char *token, + usize len, usize esc) { + yyjson_val *val = unsafe_yyjson_get_first(arr); + usize num = unsafe_yyjson_get_len(arr), idx = 0; + if (unlikely(num == 0)) return NULL; + if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; + if (unlikely(idx >= num)) return NULL; + if (unsafe_yyjson_arr_is_flat(arr)) { + return val + idx; + } else { + while (idx-- > 0) val = unsafe_yyjson_get_next(val); + return val; + } +} + +/** + Get a value from object by token. + @param obj [in] an object, should not be NULL or non-object type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @return value associated with the token, or NULL if no value + */ +static_inline yyjson_val *ptr_obj_get(yyjson_val *obj, const char *token, + usize len, usize esc) { + yyjson_val *key = unsafe_yyjson_get_first(obj); + usize num = unsafe_yyjson_get_len(obj); + if (unlikely(num == 0)) return NULL; + for (; num > 0; num--, key = unsafe_yyjson_get_next(key + 1)) { + if (ptr_token_eq(key, token, len, esc)) return key + 1; + } + return NULL; +} + +/** + Get a value from array by token. + @param arr [in] an array, should not be NULL or non-array type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param pre [out] previous (sibling) value of the returned value + @param last [out] whether index is last + @return value at index, or NULL if token is not index or index is out of range + */ +static_inline yyjson_mut_val *ptr_mut_arr_get(yyjson_mut_val *arr, + const char *token, + usize len, usize esc, + yyjson_mut_val **pre, + bool *last) { + yyjson_mut_val *val = (yyjson_mut_val *)arr->uni.ptr; /* last (tail) */ + usize num = unsafe_yyjson_get_len(arr), idx; + if (last) *last = false; + if (pre) *pre = NULL; + if (unlikely(num == 0)) { + if (last && len == 1 && (*token == '0' || *token == '-')) *last = true; + return NULL; + } + if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL; + if (last) *last = (idx == num || idx == USIZE_MAX); + if (unlikely(idx >= num)) return NULL; + while (idx-- > 0) val = val->next; + *pre = val; + return val->next; +} + +/** + Get a value from object by token. + @param obj [in] an object, should not be NULL or non-object type + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param pre [out] previous (sibling) key of the returned value's key + @return value associated with the token, or NULL if no value + */ +static_inline yyjson_mut_val *ptr_mut_obj_get(yyjson_mut_val *obj, + const char *token, + usize len, usize esc, + yyjson_mut_val **pre) { + yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr, *key; + usize num = unsafe_yyjson_get_len(obj); + if (pre) *pre = NULL; + if (unlikely(num == 0)) return NULL; + for (; num > 0; num--, pre_key = key) { + key = pre_key->next->next; + if (ptr_token_eq(key, token, len, esc)) { + *pre = pre_key; + return key->next; + } + } + return NULL; +} + +/** + Create a string value with JSON pointer token. + @param token [in] a JSON pointer token + @param len [in] unescaped token length + @param esc [in] number of escaped characters in this token + @param doc [in] used for memory allocation when creating value + @return new string value, or NULL if memory allocation failed + */ +static_inline yyjson_mut_val *ptr_new_key(const char *token, + usize len, usize esc, + yyjson_mut_doc *doc) { + const char *src = token; + if (likely(!esc)) { + return yyjson_mut_strncpy(doc, src, len); + } else { + const char *end = src + len + esc; + char *dst = unsafe_yyjson_mut_str_alc(doc, len + esc); + char *str = dst; + if (unlikely(!dst)) return NULL; + for (; src < end; src++, dst++) { + if (*src != '~') *dst = *src; + else *dst = (*++src == '0' ? '~' : '/'); + } + *dst = '\0'; + return yyjson_mut_strn(doc, str, len); + } +} + +/* macros for yyjson_ptr */ +#define return_err(_ret, _code, _pos, _msg) do { \ + if (err) { \ + err->code = YYJSON_PTR_ERR_##_code; \ + err->msg = _msg; \ + err->pos = (usize)(_pos); \ + } \ + return _ret; \ +} while (false) + +#define return_err_resolve(_ret, _pos) \ + return_err(_ret, RESOLVE, _pos, "JSON pointer cannot be resolved") +#define return_err_syntax(_ret, _pos) \ + return_err(_ret, SYNTAX, _pos, "invalid escaped character") +#define return_err_alloc(_ret) \ + return_err(_ret, MEMORY_ALLOCATION, 0, "failed to create value") + +yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, + const char *ptr, size_t ptr_len, + yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize len, esc; + yyjson_type type; + + while (true) { + token = ptr_next_token(&ptr, end, &len, &esc); + if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); + type = unsafe_yyjson_get_type(val); + if (type == YYJSON_TYPE_OBJ) { + val = ptr_obj_get(val, token, len, esc); + } else if (type == YYJSON_TYPE_ARR) { + val = ptr_arr_get(val, token, len, esc); + } else { + val = NULL; + } + if (!val) return_err_resolve(NULL, token - hdr); + if (ptr == end) return val; + } +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_getx(yyjson_mut_val *val, + const char *ptr, + size_t ptr_len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize len, esc; + yyjson_mut_val *ctn, *pre = NULL; + yyjson_type type; + bool idx_is_last = false; + + while (true) { + token = ptr_next_token(&ptr, end, &len, &esc); + if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr); + ctn = val; + type = unsafe_yyjson_get_type(val); + if (type == YYJSON_TYPE_OBJ) { + val = ptr_mut_obj_get(val, token, len, esc, &pre); + } else if (type == YYJSON_TYPE_ARR) { + val = ptr_mut_arr_get(val, token, len, esc, &pre, &idx_is_last); + } else { + val = NULL; + } + if (ctx && (ptr == end)) { + if (type == YYJSON_TYPE_OBJ || + (type == YYJSON_TYPE_ARR && (val || idx_is_last))) { + ctx->ctn = ctn; + ctx->pre = pre; + } + } + if (!val) return_err_resolve(NULL, token - hdr); + if (ptr == end) return val; + } +} + +bool unsafe_yyjson_mut_ptr_putx(yyjson_mut_val *val, + const char *ptr, size_t ptr_len, + yyjson_mut_val *new_val, + yyjson_mut_doc *doc, + bool create_parent, bool insert_new, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + + const char *hdr = ptr, *end = ptr + ptr_len, *token; + usize token_len, esc, ctn_len; + yyjson_mut_val *ctn, *key, *pre = NULL; + yyjson_mut_val *sep_ctn = NULL, *sep_key = NULL, *sep_val = NULL; + yyjson_type ctn_type; + bool idx_is_last = false; + + /* skip exist parent nodes */ + while (true) { + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_syntax(false, ptr - hdr); + ctn = val; + ctn_type = unsafe_yyjson_get_type(ctn); + if (ctn_type == YYJSON_TYPE_OBJ) { + val = ptr_mut_obj_get(ctn, token, token_len, esc, &pre); + } else if (ctn_type == YYJSON_TYPE_ARR) { + val = ptr_mut_arr_get(ctn, token, token_len, esc, &pre, + &idx_is_last); + } else return_err_resolve(false, token - hdr); + if (!val) break; + if (ptr == end) break; /* is last token */ + } + + /* create parent nodes if not exist */ + if (unlikely(ptr != end)) { /* not last token */ + if (!create_parent) return_err_resolve(false, token - hdr); + + /* add value at last index if container is array */ + if (ctn_type == YYJSON_TYPE_ARR) { + if (!idx_is_last || !insert_new) { + return_err_resolve(false, token - hdr); + } + val = yyjson_mut_obj(doc); + if (!val) return_err_alloc(false); + + /* delay attaching until all operations are completed */ + sep_ctn = ctn; + sep_key = NULL; + sep_val = val; + + /* move to next token */ + ctn = val; + val = NULL; + ctn_type = YYJSON_TYPE_OBJ; + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_resolve(false, token - hdr); + } + + /* container is object, create parent nodes */ + while (ptr != end) { /* not last token */ + key = ptr_new_key(token, token_len, esc, doc); + if (!key) return_err_alloc(false); + val = yyjson_mut_obj(doc); + if (!val) return_err_alloc(false); + + /* delay attaching until all operations are completed */ + if (!sep_ctn) { + sep_ctn = ctn; + sep_key = key; + sep_val = val; + } else { + yyjson_mut_obj_add(ctn, key, val); + } + + /* move to next token */ + ctn = val; + val = NULL; + token = ptr_next_token(&ptr, end, &token_len, &esc); + if (unlikely(!token)) return_err_syntax(false, ptr - hdr); + } + } + + /* JSON pointer is resolved, insert or replace target value */ + ctn_len = unsafe_yyjson_get_len(ctn); + if (ctn_type == YYJSON_TYPE_OBJ) { + if (ctx) ctx->ctn = ctn; + if (!val || insert_new) { + /* insert new key-value pair */ + key = ptr_new_key(token, token_len, esc, doc); + if (unlikely(!key)) return_err_alloc(false); + if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : key; + unsafe_yyjson_mut_obj_add(ctn, key, new_val, ctn_len); + } else { + /* replace exist value */ + key = pre->next->next; + if (ctx) ctx->pre = pre; + if (ctx) ctx->old = val; + yyjson_mut_obj_put(ctn, key, new_val); + } + } else { + /* array */ + if (ctx && (val || idx_is_last)) ctx->ctn = ctn; + if (insert_new) { + /* append new value */ + if (val) { + pre->next = new_val; + new_val->next = val; + if (ctx) ctx->pre = pre; + unsafe_yyjson_set_len(ctn, ctn_len + 1); + } else if (idx_is_last) { + if (ctx) ctx->pre = ctn_len ? + (yyjson_mut_val *)ctn->uni.ptr : new_val; + yyjson_mut_arr_append(ctn, new_val); + } else { + return_err_resolve(false, token - hdr); + } + } else { + /* replace exist value */ + if (!val) return_err_resolve(false, token - hdr); + if (ctn_len > 1) { + new_val->next = val->next; + pre->next = new_val; + if (ctn->uni.ptr == val) ctn->uni.ptr = new_val; + } else { + new_val->next = new_val; + ctn->uni.ptr = new_val; + pre = new_val; + } + if (ctx) ctx->pre = pre; + if (ctx) ctx->old = val; + } + } + + /* all operations are completed, attach the new components to the target */ + if (unlikely(sep_ctn)) { + if (sep_key) yyjson_mut_obj_add(sep_ctn, sep_key, sep_val); + else yyjson_mut_arr_append(sep_ctn, sep_val); + } + return true; +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex( + yyjson_mut_val *val, const char *ptr, size_t len, yyjson_mut_val *new_val, + yyjson_ptr_ctx *ctx, yyjson_ptr_err *err) { + + yyjson_mut_val *cur_val; + yyjson_ptr_ctx cur_ctx; + memset(&cur_ctx, 0, sizeof(cur_ctx)); + if (!ctx) ctx = &cur_ctx; + cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); + if (!cur_val) return NULL; + + if (yyjson_mut_is_obj(ctx->ctn)) { + yyjson_mut_val *key = ctx->pre->next->next; + yyjson_mut_obj_put(ctx->ctn, key, new_val); + } else { + yyjson_ptr_ctx_replace(ctx, new_val); + } + ctx->old = cur_val; + return cur_val; +} + +yyjson_mut_val *unsafe_yyjson_mut_ptr_removex(yyjson_mut_val *val, + const char *ptr, + size_t len, + yyjson_ptr_ctx *ctx, + yyjson_ptr_err *err) { + yyjson_mut_val *cur_val; + yyjson_ptr_ctx cur_ctx; + memset(&cur_ctx, 0, sizeof(cur_ctx)); + if (!ctx) ctx = &cur_ctx; + cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err); + if (cur_val) { + if (yyjson_mut_is_obj(ctx->ctn)) { + yyjson_mut_val *key = ctx->pre->next->next; + yyjson_mut_obj_put(ctx->ctn, key, NULL); + } else { + yyjson_ptr_ctx_remove(ctx); + } + ctx->pre = NULL; + ctx->old = cur_val; } + return cur_val; } +/* macros for yyjson_ptr */ +#undef return_err +#undef return_err_resolve +#undef return_err_syntax +#undef return_err_alloc + /*============================================================================== - * JSON Pointer + * JSON Patch API (RFC 6902) *============================================================================*/ -/** - Get value from JSON array with a path segment (array index). - @param ptr Input the segment after `/`, output the end of segment. - @param end The end of entire JSON pointer. - @param arr JSON array (yyjson_val/yyjson_mut_val, based on `mut`). - @param mut Whether `arr` is mutable. - @return The matched value, or NULL if not matched. - */ -static_inline void *pointer_read_arr(const char **ptr, - const char *end, - void *arr, - bool mut) { - const char *hdr = *ptr; - const char *cur = hdr; - yyjson_val *i_arr = (yyjson_val *)arr; - yyjson_mut_val *m_arr = (yyjson_mut_val *)arr; - u64 idx = 0; - u8 add; - - /* start with 0 */ - if (cur < end && *cur == '0') { - *ptr = cur + 1; - return mut - ? (void *)yyjson_mut_arr_get_first(m_arr) - : (void *)yyjson_arr_get_first(i_arr); - } - - /* read whole number */ - if (cur + U64_SAFE_DIG < end) end = cur + U64_SAFE_DIG; - while (cur < end && (add = (u8)((u8)*cur - (u8)'0')) <= 9) { - cur++; - idx = idx * 10 + add; +/* JSON Patch operation */ +typedef enum patch_op { + PATCH_OP_ADD, /* path, value */ + PATCH_OP_REMOVE, /* path */ + PATCH_OP_REPLACE, /* path, value */ + PATCH_OP_MOVE, /* from, path */ + PATCH_OP_COPY, /* from, path */ + PATCH_OP_TEST, /* path, value */ + PATCH_OP_NONE /* invalid */ +} patch_op; + +static patch_op patch_op_get(yyjson_val *op) { + const char *str = op->uni.str; + switch (unsafe_yyjson_get_len(op)) { + case 3: + if (!memcmp(str, "add", 3)) return PATCH_OP_ADD; + return PATCH_OP_NONE; + case 4: + if (!memcmp(str, "move", 4)) return PATCH_OP_MOVE; + if (!memcmp(str, "copy", 4)) return PATCH_OP_COPY; + if (!memcmp(str, "test", 4)) return PATCH_OP_TEST; + return PATCH_OP_NONE; + case 6: + if (!memcmp(str, "remove", 6)) return PATCH_OP_REMOVE; + return PATCH_OP_NONE; + case 7: + if (!memcmp(str, "replace", 7)) return PATCH_OP_REPLACE; + return PATCH_OP_NONE; + default: + return PATCH_OP_NONE; } - if (cur == hdr || idx >= (u64)USIZE_MAX) return NULL; - *ptr = cur; - return mut - ? (void *)yyjson_mut_arr_get(m_arr, (usize)idx) - : (void *)yyjson_arr_get(i_arr, (usize)idx); } -/** - Get value from JSON object with a path segment (object key). - @param ptr Input the segment after `/`, output the end of segment. - @param end The end of entire JSON pointer. - @param obj JSON object (yyjson_val/yyjson_mut_val, based on `mut`). - @param mut `obj` is mutable. - @return The matched value, or NULL if not matched. - */ -static_inline void *pointer_read_obj(const char **ptr, - const char *end, - void *obj, - bool mut) { -#define BUF_SIZE 512 -#define is_escaped(cur) ((cur) < end && (*(cur) == '0' || *(cur) == '1')) -#define is_unescaped(cur) ((cur) < end && *(cur) != '/' && *(cur) != '~') -#define is_completed(cur) ((cur) == end || *(cur) == '/') - - const char *hdr = *ptr; - const char *cur = hdr; - yyjson_val *i_obj = (yyjson_val *)obj; - yyjson_mut_val *m_obj = (yyjson_mut_val *)obj; - yyjson_obj_iter i_iter; - yyjson_mut_obj_iter m_iter; - void *key; - - /* skip unescaped characters */ - while (is_unescaped(cur)) cur++; - if (likely(is_completed(cur))) { - usize len = (usize)(cur - hdr); - *ptr = cur; - return mut - ? (void *)yyjson_mut_obj_getn(m_obj, hdr, len) - : (void *)yyjson_obj_getn(i_obj, hdr, len); - } - - /* copy escaped characters to buffer */ - if (likely(end - hdr <= BUF_SIZE)) { - char buf[BUF_SIZE]; - char *dst = buf + (usize)(cur - hdr); - memcpy(buf, hdr, (usize)(cur - hdr)); - while (true) { - if (is_unescaped(cur)) { - *dst++ = *cur++; - } else if (is_completed(cur)) { - usize len = (usize)(dst - buf); - *ptr = cur; - return mut - ? (void *)yyjson_mut_obj_getn(m_obj, buf, len) - : (void *)yyjson_obj_getn(i_obj, buf, len); - } else { - cur++; /* skip '~' */ - if (unlikely(!is_escaped(cur))) return NULL; - *dst++ = (char)(*cur++ == '0' ? '~' : '/'); - } +/* macros for yyjson_patch */ +#define return_err(_code, _msg) do { \ + if (err->ptr.code == YYJSON_PTR_ERR_MEMORY_ALLOCATION) { \ + err->code = YYJSON_PATCH_ERROR_MEMORY_ALLOCATION; \ + err->msg = _msg; \ + memset(&err->ptr, 0, sizeof(yyjson_ptr_err)); \ + } else { \ + err->code = YYJSON_PATCH_ERROR_##_code; \ + err->msg = _msg; \ + err->idx = iter.idx ? iter.idx - 1 : 0; \ + } \ + return NULL; \ +} while (false) + +#define return_err_copy() \ + return_err(MEMORY_ALLOCATION, "failed to copy value") +#define return_err_key(_key) \ + return_err(MISSING_KEY, "missing key " _key) +#define return_err_val(_key) \ + return_err(INVALID_MEMBER, "invalid member " _key) + +#define ptr_get(_ptr) yyjson_mut_ptr_getx( \ + root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) +#define ptr_add(_ptr, _val) yyjson_mut_ptr_addx( \ + root, _ptr->uni.str, _ptr##_len, _val, doc, false, NULL, &err->ptr) +#define ptr_remove(_ptr) yyjson_mut_ptr_removex( \ + root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr) +#define ptr_replace(_ptr, _val)yyjson_mut_ptr_replacex( \ + root, _ptr->uni.str, _ptr##_len, _val, NULL, &err->ptr) + +yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, + yyjson_val *orig, + yyjson_val *patch, + yyjson_patch_err *err) { + + yyjson_mut_val *root; + yyjson_val *obj; + yyjson_arr_iter iter; + yyjson_patch_err err_tmp; + if (!err) err = &err_tmp; + memset(err, 0, sizeof(*err)); + memset(&iter, 0, sizeof(iter)); + + if (unlikely(!doc || !orig || !patch)) { + return_err(INVALID_PARAMETER, "input parameter is NULL"); + } + if (unlikely(!yyjson_is_arr(patch))) { + return_err(INVALID_PARAMETER, "input patch is not array"); + } + root = yyjson_val_mut_copy(doc, orig); + if (unlikely(!root)) return_err_copy(); + + /* iterate through the patch array */ + yyjson_arr_iter_init(patch, &iter); + while ((obj = yyjson_arr_iter_next(&iter))) { + patch_op op_enum; + yyjson_val *op, *path, *from = NULL, *value; + yyjson_mut_val *val = NULL, *test; + usize path_len, from_len = 0; + if (unlikely(!unsafe_yyjson_is_obj(obj))) { + return_err(INVALID_OPERATION, "JSON patch operation is not object"); } - } - - /* compare byte by byte */ - cur = hdr; - if (!mut) yyjson_obj_iter_init(i_obj, &i_iter); - else yyjson_mut_obj_iter_init(m_obj, &m_iter); - while ((key = mut ? (void *)yyjson_mut_obj_iter_next(&m_iter) - : (void *)yyjson_obj_iter_next(&i_iter))) { - const char *k_str = unsafe_yyjson_get_str(key); - const char *k_end = k_str + unsafe_yyjson_get_len(key); - while (k_str < k_end) { - if (is_unescaped(cur) && *k_str == *cur) { - k_str += 1; - cur += 1; - } else if (cur < end && *cur == '~' && is_escaped(cur + 1) && - *k_str == (*(cur + 1) == '0' ? '~' : '/')) { - k_str += 1; - cur += 2; - } else { + + /* get required member: op */ + op = yyjson_obj_get(obj, "op"); + if (unlikely(!op)) return_err_key("`op`"); + if (unlikely(!yyjson_is_str(op))) return_err_val("`op`"); + op_enum = patch_op_get(op); + + /* get required member: path */ + path = yyjson_obj_get(obj, "path"); + if (unlikely(!path)) return_err_key("`path`"); + if (unlikely(!yyjson_is_str(path))) return_err_val("`path`"); + path_len = unsafe_yyjson_get_len(path); + + /* get required member: value, from */ + switch ((int)op_enum) { + case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: + value = yyjson_obj_get(obj, "value"); + if (unlikely(!value)) return_err_key("`value`"); + val = yyjson_val_mut_copy(doc, value); + if (unlikely(!val)) return_err_copy(); + break; + case PATCH_OP_MOVE: case PATCH_OP_COPY: + from = yyjson_obj_get(obj, "from"); + if (unlikely(!from)) return_err_key("`from`"); + if (unlikely(!yyjson_is_str(from))) return_err_val("`from`"); + from_len = unsafe_yyjson_get_len(from); + break; + default: break; - } } - if (k_str == k_end && is_completed(cur)) { - *ptr = cur; - return mut - ? (void *)yyjson_mut_obj_iter_get_val((yyjson_mut_val *)key) - : (void *)yyjson_obj_iter_get_val((yyjson_val *)key); + + /* perform an operation */ + switch ((int)op_enum) { + case PATCH_OP_ADD: /* add(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_REMOVE: /* remove(path) */ + if (unlikely(!ptr_remove(path))) { + return_err(POINTER, "failed to remove `path`"); + } + break; + case PATCH_OP_REPLACE: /* replace(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_replace(path, val))) { + return_err(POINTER, "failed to replace `path`"); + } + break; + case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ + if (unlikely(from_len == 0 && path_len == 0)) break; + val = ptr_remove(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to remove `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ + val = ptr_get(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to get `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + val = yyjson_mut_val_mut_copy(doc, val); + if (unlikely(!val)) return_err_copy(); + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ + test = ptr_get(path); + if (unlikely(!test)) { + return_err(POINTER, "failed to get `path`"); + } + if (unlikely(!yyjson_mut_equals(val, test))) { + return_err(EQUAL, "failed to test equal"); + } + break; + default: + return_err(INVALID_MEMBER, "unsupported `op`"); } } - return NULL; - -#undef BUF_SIZE -#undef is_escaped -#undef is_unescaped -#undef is_completed -} - -yyjson_api yyjson_val *unsafe_yyjson_get_pointer(yyjson_val *val, - const char *ptr, - usize len) { - const char *end = ptr + len; - ptr++; /* skip '/' */ - while (true) { - if (yyjson_is_obj(val)) { - val = (yyjson_val *)pointer_read_obj(&ptr, end, val, false); - } else if (yyjson_is_arr(val)) { - val = (yyjson_val *)pointer_read_arr(&ptr, end, val, false); - } else { - val = NULL; + return root; +} + +yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, + yyjson_mut_val *orig, + yyjson_mut_val *patch, + yyjson_patch_err *err) { + yyjson_mut_val *root, *obj; + yyjson_mut_arr_iter iter; + yyjson_patch_err err_tmp; + if (!err) err = &err_tmp; + memset(err, 0, sizeof(*err)); + memset(&iter, 0, sizeof(iter)); + + if (unlikely(!doc || !orig || !patch)) { + return_err(INVALID_PARAMETER, "input parameter is NULL"); + } + if (unlikely(!yyjson_mut_is_arr(patch))) { + return_err(INVALID_PARAMETER, "input patch is not array"); + } + root = yyjson_mut_val_mut_copy(doc, orig); + if (unlikely(!root)) return_err_copy(); + + /* iterate through the patch array */ + yyjson_mut_arr_iter_init(patch, &iter); + while ((obj = yyjson_mut_arr_iter_next(&iter))) { + patch_op op_enum; + yyjson_mut_val *op, *path, *from = NULL, *value; + yyjson_mut_val *val = NULL, *test; + usize path_len, from_len = 0; + if (!unsafe_yyjson_is_obj(obj)) { + return_err(INVALID_OPERATION, "JSON patch operation is not object"); } - if (!val || ptr == end) return val; - if (*ptr++ != '/') return NULL; - } -} -yyjson_api yyjson_mut_val *unsafe_yyjson_mut_get_pointer(yyjson_mut_val *val, - const char *ptr, - usize len) { - const char *end = ptr + len; - ptr++; /* skip '/' */ - while (true) { - if (yyjson_mut_is_obj(val)) { - val = (yyjson_mut_val *)pointer_read_obj(&ptr, end, val, true); - } else if (yyjson_mut_is_arr(val)) { - val = (yyjson_mut_val *)pointer_read_arr(&ptr, end, val, true); - } else { - val = NULL; + /* get required member: op */ + op = yyjson_mut_obj_get(obj, "op"); + if (unlikely(!op)) return_err_key("`op`"); + if (unlikely(!yyjson_mut_is_str(op))) return_err_val("`op`"); + op_enum = patch_op_get((yyjson_val *)(void *)op); + + /* get required member: path */ + path = yyjson_mut_obj_get(obj, "path"); + if (unlikely(!path)) return_err_key("`path`"); + if (unlikely(!yyjson_mut_is_str(path))) return_err_val("`path`"); + path_len = unsafe_yyjson_get_len(path); + + /* get required member: value, from */ + switch ((int)op_enum) { + case PATCH_OP_ADD: case PATCH_OP_REPLACE: case PATCH_OP_TEST: + value = yyjson_mut_obj_get(obj, "value"); + if (unlikely(!value)) return_err_key("`value`"); + val = yyjson_mut_val_mut_copy(doc, value); + if (unlikely(!val)) return_err_copy(); + break; + case PATCH_OP_MOVE: case PATCH_OP_COPY: + from = yyjson_mut_obj_get(obj, "from"); + if (unlikely(!from)) return_err_key("`from`"); + if (unlikely(!yyjson_mut_is_str(from))) { + return_err_val("`from`"); + } + from_len = unsafe_yyjson_get_len(from); + break; + default: + break; + } + + /* perform an operation */ + switch ((int)op_enum) { + case PATCH_OP_ADD: /* add(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_REMOVE: /* remove(path) */ + if (unlikely(!ptr_remove(path))) { + return_err(POINTER, "failed to remove `path`"); + } + break; + case PATCH_OP_REPLACE: /* replace(path, val) */ + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_replace(path, val))) { + return_err(POINTER, "failed to replace `path`"); + } + break; + case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */ + if (unlikely(from_len == 0 && path_len == 0)) break; + val = ptr_remove(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to remove `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */ + val = ptr_get(from); + if (unlikely(!val)) { + return_err(POINTER, "failed to get `from`"); + } + if (unlikely(path_len == 0)) { root = val; break; } + val = yyjson_mut_val_mut_copy(doc, val); + if (unlikely(!val)) return_err_copy(); + if (unlikely(!ptr_add(path, val))) { + return_err(POINTER, "failed to add `path`"); + } + break; + case PATCH_OP_TEST: /* test = get(path), test.eq(val) */ + test = ptr_get(path); + if (unlikely(!test)) { + return_err(POINTER, "failed to get `path`"); + } + if (unlikely(!yyjson_mut_equals(val, test))) { + return_err(EQUAL, "failed to test equal"); + } + break; + default: + return_err(INVALID_MEMBER, "unsupported `op`"); } - if (!val || ptr == end) return val; - if (*ptr++ != '/') return NULL; } + return root; } +/* macros for yyjson_patch */ +#undef return_err +#undef return_err_copy +#undef return_err_key +#undef return_err_val +#undef ptr_get +#undef ptr_add +#undef ptr_remove +#undef ptr_replace + /*============================================================================== - * JSON Merge-Patch + * JSON Merge-Patch API (RFC 7386) *============================================================================*/ -yyjson_api yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, - yyjson_val *orig, - yyjson_val *patch) { +yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, + yyjson_val *orig, + yyjson_val *patch) { usize idx, max; yyjson_val *key, *orig_val, *patch_val, local_orig; yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; - + if (unlikely(!yyjson_is_obj(patch))) { return yyjson_val_mut_copy(doc, patch); } - + builder = yyjson_mut_obj(doc); if (unlikely(!builder)) return NULL; - + + memset(&local_orig, 0, sizeof(local_orig)); if (!yyjson_is_obj(orig)) { orig = &local_orig; orig->tag = builder->tag; orig->uni = builder->uni; } - + + /* If orig is contributing, copy any items not modified by the patch */ + if (orig != &local_orig) { + yyjson_obj_foreach(orig, idx, max, key, orig_val) { + patch_val = yyjson_obj_getn(patch, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + if (!patch_val) { + mut_key = yyjson_val_mut_copy(doc, key); + mut_val = yyjson_val_mut_copy(doc, orig_val); + if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; + } + } + } + /* Merge items modified by the patch. */ yyjson_obj_foreach(patch, idx, max, key, patch_val) { /* null indicates the field is removed. */ @@ -1846,47 +2710,45 @@ yyjson_api yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, merged_val = yyjson_merge_patch(doc, orig_val, patch_val); if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; } - - /* Exit early, if orig is not contributing to the final result. */ - if (orig == &local_orig) { - return builder; - } - - /* Copy over any items that weren't modified by the patch. */ - yyjson_obj_foreach(orig, idx, max, key, orig_val) { - patch_val = yyjson_obj_getn(patch, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - if (!patch_val) { - mut_key = yyjson_val_mut_copy(doc, key); - mut_val = yyjson_val_mut_copy(doc, orig_val); - if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; - } - } - + return builder; } -yyjson_api yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, - yyjson_mut_val *orig, - yyjson_mut_val *patch) { +yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, + yyjson_mut_val *orig, + yyjson_mut_val *patch) { usize idx, max; yyjson_mut_val *key, *orig_val, *patch_val, local_orig; yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val; - + if (unlikely(!yyjson_mut_is_obj(patch))) { return yyjson_mut_val_mut_copy(doc, patch); } - + builder = yyjson_mut_obj(doc); if (unlikely(!builder)) return NULL; - + + memset(&local_orig, 0, sizeof(local_orig)); if (!yyjson_mut_is_obj(orig)) { orig = &local_orig; orig->tag = builder->tag; orig->uni = builder->uni; } - + + /* If orig is contributing, copy any items not modified by the patch */ + if (orig != &local_orig) { + yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) { + patch_val = yyjson_mut_obj_getn(patch, + unsafe_yyjson_get_str(key), + unsafe_yyjson_get_len(key)); + if (!patch_val) { + mut_key = yyjson_mut_val_mut_copy(doc, key); + mut_val = yyjson_mut_val_mut_copy(doc, orig_val); + if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; + } + } + } + /* Merge items modified by the patch. */ yyjson_mut_obj_foreach(patch, idx, max, key, patch_val) { /* null indicates the field is removed. */ @@ -1900,27 +2762,12 @@ yyjson_api yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc, merged_val = yyjson_mut_merge_patch(doc, orig_val, patch_val); if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL; } - - /* Exit early, if orig is not contributing to the final result. */ - if (orig == &local_orig) { - return builder; - } - - /* Copy over any items that weren't modified by the patch. */ - yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) { - patch_val = yyjson_mut_obj_getn(patch, - unsafe_yyjson_get_str(key), - unsafe_yyjson_get_len(key)); - if (!patch_val) { - mut_key = yyjson_mut_val_mut_copy(doc, key); - mut_val = yyjson_mut_val_mut_copy(doc, orig_val); - if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL; - } - } - + return builder; } +#endif /* YYJSON_DISABLE_UTILS */ + /*============================================================================== @@ -2643,8 +3490,6 @@ static_inline void pow10_table_get_exp(i32 exp10, i32 *exp2) { -#if !YYJSON_DISABLE_READER - /*============================================================================== * JSON Character Matcher *============================================================================*/ @@ -2670,9 +3515,12 @@ static const char_type CHAR_TYPE_CONTAINER = 1 << 4; /** Comment character: '/'. */ static const char_type CHAR_TYPE_COMMENT = 1 << 5; -/** Line end character '\\n', '\\r', '\0'. */ +/** Line end character: '\\n', '\\r', '\0'. */ static const char_type CHAR_TYPE_LINE_END = 1 << 6; +/** Hexadecimal numeric character: [0-9a-fA-F]. */ +static const char_type CHAR_TYPE_HEX = 1 << 7; + /** Character type table (generate with misc/make_tables.c) */ static const char_type char_table[256] = { 0x44, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, @@ -2681,13 +3529,13 @@ static const char_type char_table[256] = { 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x20, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, + 0x82, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x04, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, @@ -2734,17 +3582,22 @@ static_inline bool char_is_container(u8 c) { return char_is_type(c, (char_type)CHAR_TYPE_CONTAINER); } -/** Match a stop character in ASCII string: '"', '\', [0x00-0x1F], [0x80-0xFF]*/ +/** Match a stop character in ASCII string: '"', '\', [0x00-0x1F,0x80-0xFF]. */ static_inline bool char_is_ascii_stop(u8 c) { return char_is_type(c, (char_type)(CHAR_TYPE_ESC_ASCII | CHAR_TYPE_NON_ASCII)); } -/** Match a line end character: '\\n', '\\r', '\0'*/ +/** Match a line end character: '\\n', '\\r', '\0'. */ static_inline bool char_is_line_end(u8 c) { return char_is_type(c, (char_type)CHAR_TYPE_LINE_END); } +/** Match a hexadecimal numeric character: [0-9a-fA-F]. */ +static_inline bool char_is_hex(u8 c) { + return char_is_type(c, (char_type)CHAR_TYPE_HEX); +} + /*============================================================================== @@ -2830,6 +3683,8 @@ static_inline bool digi_is_digit_or_fp(u8 d) { +#if !YYJSON_DISABLE_READER + /*============================================================================== * Hex Character Reader * This function is used by JSON reader to read escaped characters. @@ -2879,7 +3734,7 @@ static const u8 hex_conv_table[256] = { /** Scans an escaped character sequence as a UTF-16 code unit (branchless). e.g. "\\u005C" should pass "005C" as `cur`. - + This requires the string has 4-byte zero padding. */ static_inline bool read_hex_u16(const u8 *cur, u16 *val) { @@ -2939,7 +3794,6 @@ static_inline bool read_null(u8 **ptr, yyjson_val *val) { /** Read 'Inf' or 'Infinity' literal (ignoring case). */ static_inline bool read_inf(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { -#if !YYJSON_DISABLE_NON_STANDARD u8 *hdr = *ptr - sign; u8 *cur = *ptr; u8 **end = ptr; @@ -2968,13 +3822,11 @@ static_inline bool read_inf(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { } return true; } -#endif return false; } /** Read 'NaN' literal (ignoring case). */ static_inline bool read_nan(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { -#if !YYJSON_DISABLE_NON_STANDARD u8 *hdr = *ptr - sign; u8 *cur = *ptr; u8 **end = ptr; @@ -2995,7 +3847,6 @@ static_inline bool read_nan(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) { } return true; } -#endif return false; } @@ -3010,40 +3861,40 @@ static_inline bool read_inf_or_nan(bool sign, u8 **ptr, u8 **pre, /** Read a JSON number as raw string. */ static_noinline bool read_number_raw(u8 **ptr, u8 **pre, - bool ext, + yyjson_read_flag flg, yyjson_val *val, const char **msg) { - + #define return_err(_pos, _msg) do { \ *msg = _msg; \ *end = _pos; \ return false; \ } while (false) - + #define return_raw() do { \ val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ val->uni.str = (const char *)hdr; \ *pre = cur; *end = cur; return true; \ } while (false) - + u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; - + /* add null-terminator for previous raw string */ if (*pre) **pre = '\0'; - + /* skip sign */ cur += (*cur == '-'); - + /* read first digit, check leading zero */ if (unlikely(!digi_is_digit(*cur))) { - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_inf_or_nan(*hdr == '-', &cur, pre, val)) return_raw(); } return_err(cur, "no digit after minus sign"); } - + /* read integral part */ if (*cur == '0') { cur++; @@ -3055,7 +3906,7 @@ static_noinline bool read_number_raw(u8 **ptr, while (digi_is_digit(*cur)) cur++; if (!digi_is_fp(*cur)) return_raw(); } - + /* read fraction part */ if (*cur == '.') { cur++; @@ -3064,7 +3915,7 @@ static_noinline bool read_number_raw(u8 **ptr, } while (digi_is_digit(*cur)) cur++; } - + /* read exponent part */ if (digi_is_exp(*cur)) { cur += 1 + digi_is_sign(cur[1]); @@ -3073,16 +3924,16 @@ static_noinline bool read_number_raw(u8 **ptr, } while (digi_is_digit(*cur)) cur++; } - + return_raw(); - + #undef return_err #undef return_raw } /** Skips spaces and comments as many as possible. - + It will return false in these cases: 1. No character is skipped. The 'end' pointer is set as input cursor. 2. A multiline comment is not closed. The 'end' pointer is set as the head @@ -3125,6 +3976,115 @@ static_noinline bool skip_spaces_and_comments(u8 **ptr) { return hdr != cur; } +/** + Check truncated string. + Returns true if `cur` match `str` but is truncated. + */ +static_inline bool is_truncated_str(u8 *cur, u8 *end, + const char *str, + bool case_sensitive) { + usize len = strlen(str); + if (cur + len <= end || end <= cur) return false; + if (case_sensitive) { + return memcmp(cur, str, (usize)(end - cur)) == 0; + } + for (; cur < end; cur++, str++) { + if ((*cur != (u8)*str) && (*cur != (u8)*str - 'a' + 'A')) { + return false; + } + } + return true; +} + +/** + Check truncated JSON on parsing errors. + Returns true if the input is valid but truncated. + */ +static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end, + yyjson_read_code code, + yyjson_read_flag flg) { + if (cur >= end) return true; + if (code == YYJSON_READ_ERROR_LITERAL) { + if (is_truncated_str(cur, end, "true", true) || + is_truncated_str(cur, end, "false", true) || + is_truncated_str(cur, end, "null", true)) { + return true; + } + } + if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER || + code == YYJSON_READ_ERROR_INVALID_NUMBER || + code == YYJSON_READ_ERROR_LITERAL) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { + if (*cur == '-') cur++; + if (is_truncated_str(cur, end, "infinity", false) || + is_truncated_str(cur, end, "nan", false)) { + return true; + } + } + } + if (code == YYJSON_READ_ERROR_UNEXPECTED_CONTENT) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { + if (hdr + 3 <= cur && + is_truncated_str(cur - 3, end, "infinity", false)) { + return true; /* e.g. infin would be read as inf + in */ + } + } + } + if (code == YYJSON_READ_ERROR_INVALID_STRING) { + usize len = (usize)(end - cur); + + /* unicode escape sequence */ + if (*cur == '\\') { + if (len == 1) return true; + if (len <= 5) { + if (*++cur != 'u') return false; + for (++cur; cur < end; cur++) { + if (!char_is_hex(*cur)) return false; + } + return true; + } + return false; + } + + /* 2 to 4 bytes UTF-8, see `read_string()` for details. */ + if (*cur & 0x80) { + u8 c0 = cur[0], c1 = cur[1], c2 = cur[2]; + if (len == 1) { + /* 2 bytes UTF-8, truncated */ + if ((c0 & 0xE0) == 0xC0 && (c0 & 0x1E) != 0x00) return true; + /* 3 bytes UTF-8, truncated */ + if ((c0 & 0xF0) == 0xE0) return true; + /* 4 bytes UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && (c0 & 0x07) <= 0x04) return true; + } + if (len == 2) { + /* 3 bytes UTF-8, truncated */ + if ((c0 & 0xF0) == 0xE0 && + (c1 & 0xC0) == 0x80) { + u8 pat = (u8)(((c0 & 0x0F) << 1) | ((c1 & 0x20) >> 5)); + return 0x01 <= pat && pat != 0x1B; + } + /* 4 bytes UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && + (c1 & 0xC0) == 0x80) { + u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); + return 0x01 <= pat && pat <= 0x10; + } + } + if (len == 3) { + /* 4 bytes UTF-8, truncated */ + if ((c0 & 0xF8) == 0xF0 && + (c1 & 0xC0) == 0x80 && + (c2 & 0xC0) == 0x80) { + u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4)); + return 0x01 <= pat && pat <= 0x10; + } + } + } + } + return false; +} + #if YYJSON_HAS_IEEE_754 && !YYJSON_DISABLE_FAST_FP_CONV /* FP_READER */ @@ -3275,12 +4235,12 @@ static_inline void bigint_set_u64(bigint *big, u64 val) { /** Set a bigint with floating point number string. */ static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, u8 *sig_cut, u8 *sig_end, u8 *dot_pos) { - + if (unlikely(!sig_cut)) { /* no digit cut, set significant part only */ bigint_set_u64(big, sig); return; - + } else { /* some digits were cut, read them from 'sig_cut' to 'sig_end' */ u8 *hdr = sig_cut; @@ -3290,7 +4250,7 @@ static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, bool dig_big_cut = false; bool has_dot = (hdr < dot_pos) & (dot_pos < sig_end); u32 dig_len_total = U64_SAFE_DIG + (u32)(sig_end - hdr) - has_dot; - + sig -= (*sig_cut >= '5'); /* sig was rounded before */ if (dig_len_total > F64_MAX_DEC_DIG) { dig_big_cut = true; @@ -3299,7 +4259,7 @@ static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, dig_len_total = (F64_MAX_DEC_DIG + 1); } *exp -= (i32)dig_len_total - U64_SAFE_DIG; - + big->used = 1; big->bits[0] = sig; while (cur < sig_end) { @@ -3363,14 +4323,14 @@ static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) { i32 exp = fp.exp; u32 lz_bits; if (unlikely(fp.sig == 0)) return 0; - + lz_bits = u64_lz_bits(sig); sig <<= lz_bits; sig >>= F64_BITS - F64_SIG_FULL_BITS; exp -= (i32)lz_bits; exp += F64_BITS - F64_SIG_FULL_BITS; exp += F64_SIG_BITS; - + if (unlikely(exp >= F64_MAX_BIN_EXP)) { /* overflow */ return F64_RAW_INF; @@ -3404,7 +4364,7 @@ static const f64 f64_pow10_table[] = { /** Read a JSON number. - + 1. This function assume that the floating-point number is in IEEE-754 format. 2. This function support uint64/int64/double number. If an integer number cannot fit in uint64/int64, it will returns as a double number. If a double @@ -3413,69 +4373,83 @@ static const f64 f64_pow10_table[] = { */ static_inline bool read_number(u8 **ptr, u8 **pre, - bool ext, + yyjson_read_flag flg, yyjson_val *val, const char **msg) { - + #define return_err(_pos, _msg) do { \ *msg = _msg; \ *end = _pos; \ return false; \ } while (false) - + +#define return_0() do { \ + val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \ + val->uni.u64 = 0; \ + *end = cur; return true; \ +} while (false) + #define return_i64(_v) do { \ val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \ val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \ *end = cur; return true; \ } while (false) - + #define return_f64(_v) do { \ val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v); \ *end = cur; return true; \ } while (false) - -#define return_f64_raw(_v) do { \ + +#define return_f64_bin(_v) do { \ val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ val->uni.u64 = ((u64)sign << 63) | (u64)(_v); \ *end = cur; return true; \ } while (false) - + #define return_inf() do { \ - if (unlikely(ext)) return_f64_raw(F64_RAW_INF); \ + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); \ + if (has_read_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_RAW_INF); \ else return_err(hdr, "number is infinity when parsed as double"); \ } while (false) - + +#define return_raw() do { \ + if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ + val->uni.str = (const char *)hdr; \ + *pre = cur; *end = cur; return true; \ +} while (false) + u8 *sig_cut = NULL; /* significant part cutting position for long number */ u8 *sig_end = NULL; /* significant part ending position */ u8 *dot_pos = NULL; /* decimal point position */ - + u64 sig = 0; /* significant part of the number */ i32 exp = 0; /* exponent part of the number */ - + bool exp_sign; /* temporary exponent sign from literal part */ i64 exp_sig = 0; /* temporary exponent number from significant part */ i64 exp_lit = 0; /* temporary exponent number from exponent literal part */ u64 num; /* temporary number for reading */ u8 *tmp; /* temporary cursor for reading */ - + u8 *hdr = *ptr; u8 *cur = *ptr; u8 **end = ptr; bool sign; - - /* read number as raw string if has flag */ - if (unlikely(pre)) { - return read_number_raw(ptr, pre, ext, val, msg); + + /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ + if (unlikely(pre && !has_read_flag(BIGNUM_AS_RAW))) { + return read_number_raw(ptr, pre, flg, val, msg); } - + sign = (*hdr == '-'); cur += sign; - + /* begin with a leading zero or non-digit */ if (unlikely(!digi_is_nonzero(*cur))) { /* 0 or non-digit char */ if (unlikely(*cur != '0')) { /* non-digit char */ - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_inf_or_nan(sign, &cur, pre, val)) { *end = cur; return true; @@ -3484,7 +4458,7 @@ static_inline bool read_number(u8 **ptr, return_err(cur, "no digit after minus sign"); } /* begin with 0 */ - if (likely(!digi_is_digit_or_fp(*++cur))) return_i64(0); + if (likely(!digi_is_digit_or_fp(*++cur))) return_0(); if (likely(*cur == '.')) { dot_pos = cur++; if (unlikely(!digi_is_digit(*cur))) { @@ -3508,15 +4482,15 @@ static_inline bool read_number(u8 **ptr, } while (digi_is_digit(*++cur)); } - return_f64_raw(0); + return_f64_bin(0); } - + /* begin with non-zero digit */ sig = (u64)(*cur - '0'); - + /* Read integral part, same as the following code. - + for (int i = 1; i <= 18; i++) { num = cur[i] - '0'; if (num <= 9) sig = num + sig * 10; @@ -3526,21 +4500,22 @@ static_inline bool read_number(u8 **ptr, #define expr_intg(i) \ if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) sig = num + sig * 10; \ else { goto digi_sepr_##i; } - repeat_in_1_18(expr_intg); + repeat_in_1_18(expr_intg) #undef expr_intg - - + + cur += 19; /* skip continuous 19 digits */ if (!digi_is_digit_or_fp(*cur)) { /* this number is an integer consisting of 19 digits */ if (sign && (sig > ((u64)1 << 63))) { /* overflow */ + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); return_f64(normalized_u64_to_f64(sig)); } return_i64(sig); } goto digi_intg_more; /* read more digits in integral part */ - - + + /* process first non-digit character */ #define expr_sepr(i) \ digi_sepr_##i: \ @@ -3550,8 +4525,8 @@ static_inline bool read_number(u8 **ptr, cur += i; sig_end = cur; goto digi_exp_more; repeat_in_1_18(expr_sepr) #undef expr_sepr - - + + /* read fraction part */ #define expr_frac(i) \ digi_frac_##i: \ @@ -3560,12 +4535,12 @@ static_inline bool read_number(u8 **ptr, else { goto digi_stop_##i; } repeat_in_1_18(expr_frac) #undef expr_frac - + cur += 20; /* skip 19 digits and 1 decimal point */ if (!digi_is_digit(*cur)) goto digi_frac_end; /* fraction part end */ goto digi_frac_more; /* read more digits in fraction part */ - - + + /* significant part end */ #define expr_stop(i) \ digi_stop_##i: \ @@ -3573,8 +4548,8 @@ static_inline bool read_number(u8 **ptr, goto digi_frac_end; repeat_in_1_18(expr_stop) #undef expr_stop - - + + /* read more digits in integral part */ digi_intg_more: if (digi_is_digit(*cur)) { @@ -3586,31 +4561,37 @@ static_inline bool read_number(u8 **ptr, sig = num + sig * 10; cur++; /* convert to double if overflow */ - if (sign) return_f64(normalized_u64_to_f64(sig)); + if (sign) { + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + return_f64(normalized_u64_to_f64(sig)); + } return_i64(sig); } } } - + if (digi_is_exp(*cur)) { dot_pos = cur; goto digi_exp_more; } - + if (*cur == '.') { dot_pos = cur++; if (!digi_is_digit(*cur)) { return_err(cur, "no digit after decimal point"); } } - - + + /* read more digits in fraction part */ digi_frac_more: sig_cut = cur; /* too large to fit in u64, excess digits need to be cut */ sig += (*cur >= '5'); /* round */ while (digi_is_digit(*++cur)); if (!dot_pos) { + if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) { + return_raw(); /* it's a large integer */ + } dot_pos = cur; if (*cur == '.') { if (!digi_is_digit(*++cur)) { @@ -3621,7 +4602,7 @@ static_inline bool read_number(u8 **ptr, } exp_sig = (i64)(dot_pos - sig_cut); exp_sig += (dot_pos < sig_cut); - + /* ignore trailing zeros */ tmp = cur - 1; while (*tmp == '0' || *tmp == '.') tmp--; @@ -3630,11 +4611,11 @@ static_inline bool read_number(u8 **ptr, } else { sig_end = cur; } - + if (digi_is_exp(*cur)) goto digi_exp_more; goto digi_exp_finish; - - + + /* fraction part end */ digi_frac_end: if (unlikely(dot_pos + 1 == cur)) { @@ -3644,15 +4625,15 @@ static_inline bool read_number(u8 **ptr, exp_sig = -(i64)((u64)(cur - dot_pos) - 1); if (likely(!digi_is_exp(*cur))) { if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) { - return_f64_raw(0); /* underflow */ + return_f64_bin(0); /* underflow */ } exp = (i32)exp_sig; goto digi_finish; } else { goto digi_exp_more; } - - + + /* read exponent part */ digi_exp_more: exp_sign = (*++cur == '-'); @@ -3661,7 +4642,7 @@ static_inline bool read_number(u8 **ptr, return_err(cur, "no digit after exponent sign"); } while (*cur == '0') cur++; - + /* read exponent literal */ tmp = cur; while (digi_is_digit(*cur)) { @@ -3669,37 +4650,37 @@ static_inline bool read_number(u8 **ptr, } if (unlikely(cur - tmp >= U64_SAFE_DIG)) { if (exp_sign) { - return_f64_raw(0); /* underflow */ + return_f64_bin(0); /* underflow */ } else { return_inf(); /* overflow */ } } exp_sig += exp_sign ? -exp_lit : exp_lit; - - + + /* validate exponent value */ digi_exp_finish: if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) { - return_f64_raw(0); /* underflow */ + return_f64_bin(0); /* underflow */ } if (unlikely(exp_sig > F64_MAX_DEC_EXP)) { return_inf(); /* overflow */ } exp = (i32)exp_sig; - - + + /* all digit read finished */ digi_finish: - + /* Fast path 1: - + 1. The floating-point number calculation should be accurate, see the comments of macro `YYJSON_DOUBLE_MATH_CORRECT`. 2. Correct rounding should be performed (fegetround() == FE_TONEAREST). 3. The input of floating point number calculation does not lose precision, which means: 64 - leading_zero(input) - trailing_zero(input) < 53. - + We don't check all available inputs here, because that would make the code more complicated, and not friendly to branch predictor. */ @@ -3716,10 +4697,10 @@ static_inline bool read_number(u8 **ptr, return_f64(dbl); } #endif - + /* Fast path 2: - + To keep it simple, we only accept normal number here, let the slow path to handle subnormal and infinity number. */ @@ -3729,16 +4710,16 @@ static_inline bool read_number(u8 **ptr, /* The result value is exactly equal to (sig * 10^exp), the exponent part (10^exp) can be converted to (sig2 * 2^exp2). - + The sig2 can be an infinite length number, only the highest 128 bits is cached in the pow10_sig_table. - + Now we have these bits: sig1 (normalized 64bit) : aaaaaaaa sig2 (higher 64bit) : bbbbbbbb sig2_ext (lower 64bit) : cccccccc sig2_cut (extra unknown bits) : dddddddddddd.... - + And the calculation process is: ---------------------------------------- aaaaaaaa * @@ -3752,17 +4733,17 @@ static_inline bool read_number(u8 **ptr, [hi2___][lo2___] + [unknown___________....] ---------------------------------------- - + The addition with carry may affect higher bits, but if there is a 0 in higher bits, the bits higher than 0 will not be affected. - + `lo2` + `unknown` may get a carry bit and may affect `hi2`, the max value of `hi2` is 0xFFFFFFFFFFFFFFFE, so `hi2` will not overflow. - + `lo` + `hi2` may also get a carry bit and may affect `hi`, but only the highest significant 53 bits of `hi` is needed. If there is a 0 in the lower bits of `hi`, then all the following bits can be dropped. - + To convert the result to IEEE-754 double number, we need to perform correct rounding: 1. if bit 54 is 0, round down, @@ -3770,30 +4751,30 @@ static_inline bool read_number(u8 **ptr, 3. if bit 54 is 1 and all bits beyond bit 54 are 0, round to even, as the extra bits is unknown, this case will not be handled here. */ - + u64 raw; u64 sig1, sig2, sig2_ext, hi, lo, hi2, lo2, add, bits; i32 exp2; u32 lz; bool exact = false, carry, round_up; - + /* convert (10^exp) to (sig2 * 2^exp2) */ pow10_table_get_sig(exp, &sig2, &sig2_ext); pow10_table_get_exp(exp, &exp2); - + /* normalize and multiply */ lz = u64_lz_bits(sig); sig1 = sig << lz; exp2 -= (i32)lz; u128_mul(sig1, sig2, &hi, &lo); - + /* The `hi` is in range [0x4000000000000000, 0xFFFFFFFFFFFFFFFE], To get normalized value, `hi` should be shifted to the left by 0 or 1. - + The highest significant 53 bits is used by IEEE-754 double number, and the bit 54 is used to detect rounding direction. - + The lowest (64 - 54 - 1) bits is used to check whether it contains 0. */ bits = hi & (((u64)1 << (64 - 54 - 1)) - 1); @@ -3805,7 +4786,7 @@ static_inline bool read_number(u8 **ptr, after `0`. */ exact = true; - + } else { /* (bits == 0 || bits == 0x1FF) @@ -3813,7 +4794,7 @@ static_inline bool read_number(u8 **ptr, lower bits with another 64-bit multiplication. */ u128_mul(sig1, sig2_ext, &hi2, &lo2); - + add = lo + hi2; if (add + 1 > (u64)1) { /* @@ -3828,38 +4809,38 @@ static_inline bool read_number(u8 **ptr, exact = true; } } - + if (exact) { /* normalize */ lz = hi < ((u64)1 << 63); hi <<= lz; exp2 -= (i32)lz; exp2 += 64; - + /* test the bit 54 and get rounding direction */ round_up = (hi & ((u64)1 << (64 - 54))) > (u64)0; hi += (round_up ? ((u64)1 << (64 - 54)) : (u64)0); - + /* test overflow */ if (hi < ((u64)1 << (64 - 54))) { hi = ((u64)1 << 63); exp2 += 1; } - + /* This is a normal number, convert it to IEEE-754 format. */ hi >>= F64_BITS - F64_SIG_FULL_BITS; exp2 += F64_BITS - F64_SIG_FULL_BITS + F64_SIG_BITS; exp2 += F64_EXP_BIAS; raw = ((u64)exp2 << F64_SIG_BITS) | (hi & F64_SIG_MASK); - return_f64_raw(raw); + return_f64_bin(raw); } } - + /* Slow path: read double number exactly with diyfp. 1. Use cached diyfp to get an approximation value. 2. Use bigcomp to check the approximation value if needed. - + This algorithm refers to google's double-conversion project: https://github.com/google/double-conversion */ @@ -3871,7 +4852,7 @@ static_inline bool read_number(u8 **ptr, const i32 DIY_SIG_BITS = 64; const i32 EXP_BIAS = F64_EXP_BIAS + F64_SIG_BITS; const i32 EXP_SUBNORMAL = -EXP_BIAS + 1; - + u64 fp_err; u32 bits; i32 order_of_magnitude; @@ -3879,32 +4860,32 @@ static_inline bool read_number(u8 **ptr, i32 precision_digits_count; u64 precision_bits; u64 half_way; - + u64 raw; diy_fp fp, fp_upper; bigint big_full, big_comp; i32 cmp; - + fp.sig = sig; fp.exp = 0; fp_err = sig_cut ? (u64)(ERR_ULP / 2) : (u64)0; - + /* normalize */ bits = u64_lz_bits(fp.sig); fp.sig <<= bits; fp.exp -= (i32)bits; fp_err <<= bits; - + /* multiply and add error */ fp = diy_fp_mul(fp, diy_fp_get_cached_pow10(exp)); fp_err += (u64)ERR_CACHED_POW + (fp_err != 0) + (u64)ERR_MUL_FIXED; - + /* normalize */ bits = u64_lz_bits(fp.sig); fp.sig <<= bits; fp.exp -= (i32)bits; fp_err <<= bits; - + /* effective significand */ order_of_magnitude = DIY_SIG_BITS + fp.exp; if (likely(order_of_magnitude >= EXP_SUBNORMAL + F64_SIG_FULL_BITS)) { @@ -3914,7 +4895,7 @@ static_inline bool read_number(u8 **ptr, } else { effective_significand_size = order_of_magnitude - EXP_SUBNORMAL; } - + /* precision digits count */ precision_digits_count = DIY_SIG_BITS - effective_significand_size; if (unlikely(precision_digits_count + ERR_ULP_LOG >= DIY_SIG_BITS)) { @@ -3924,27 +4905,27 @@ static_inline bool read_number(u8 **ptr, fp_err = (fp_err >> shr) + 1 + (u32)ERR_ULP; precision_digits_count -= shr; } - + /* half way */ precision_bits = fp.sig & (((u64)1 << precision_digits_count) - 1); precision_bits *= (u32)ERR_ULP; half_way = (u64)1 << (precision_digits_count - 1); half_way *= (u32)ERR_ULP; - + /* rounding */ fp.sig >>= precision_digits_count; fp.sig += (precision_bits >= half_way + fp_err); fp.exp += precision_digits_count; - + /* get IEEE double raw value */ raw = diy_fp_to_ieee_raw(fp); if (unlikely(raw == F64_RAW_INF)) return_inf(); if (likely(precision_bits <= half_way - fp_err || precision_bits >= half_way + fp_err)) { - return_f64_raw(raw); /* number is accurate */ + return_f64_bin(raw); /* number is accurate */ } /* now the number is the correct value, or the next lower value */ - + /* upper boundary */ if (raw & F64_EXP_MASK) { fp_upper.sig = (raw & F64_SIG_MASK) + ((u64)1 << F64_SIG_BITS); @@ -3957,7 +4938,7 @@ static_inline bool read_number(u8 **ptr, fp_upper.sig <<= 1; fp_upper.exp -= 1; fp_upper.sig += 1; /* add half ulp */ - + /* compare with bigint */ bigint_set_buf(&big_full, sig, &exp, sig_cut, sig_end, dot_pos); bigint_set_u64(&big_comp, fp_upper.sig); @@ -3979,17 +4960,18 @@ static_inline bool read_number(u8 **ptr, /* falls midway, round to even */ raw += (raw & 1); } - + if (unlikely(raw == F64_RAW_INF)) return_inf(); - return_f64_raw(raw); + return_f64_bin(raw); } - -#undef has_flag + #undef return_err #undef return_inf +#undef return_0 #undef return_i64 #undef return_f64 -#undef return_f64_raw +#undef return_f64_bin +#undef return_raw } @@ -4001,36 +4983,55 @@ static_inline bool read_number(u8 **ptr, This is a fallback function if the custom number reader is disabled. This function use libc's strtod() to read floating-point number. */ -static_noinline bool read_number(u8 **ptr, - u8 **pre, - bool ext, - yyjson_val *val, - const char **msg) { - +static_inline bool read_number(u8 **ptr, + u8 **pre, + yyjson_read_flag flg, + yyjson_val *val, + const char **msg) { + #define return_err(_pos, _msg) do { \ *msg = _msg; \ *end = _pos; \ return false; \ } while (false) - + +#define return_0() do { \ + val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \ + val->uni.u64 = 0; \ + *end = cur; return true; \ +} while (false) + #define return_i64(_v) do { \ val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \ val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \ *end = cur; return true; \ } while (false) - + #define return_f64(_v) do { \ val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v); \ *end = cur; return true; \ } while (false) - -#define return_f64_raw(_v) do { \ + +#define return_f64_bin(_v) do { \ val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \ val->uni.u64 = ((u64)sign << 63) | (u64)(_v); \ *end = cur; return true; \ } while (false) - + +#define return_inf() do { \ + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); \ + if (has_read_flag(ALLOW_INF_AND_NAN)) return_f64_bin(F64_RAW_INF); \ + else return_err(hdr, "number is infinity when parsed as double"); \ +} while (false) + +#define return_raw() do { \ + if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \ + val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \ + val->uni.str = (const char *)hdr; \ + *pre = cur; *end = cur; return true; \ +} while (false) + u64 sig, num; u8 *hdr = *ptr; u8 *cur = *ptr; @@ -4038,19 +5039,19 @@ static_noinline bool read_number(u8 **ptr, u8 *dot = NULL; u8 *f64_end = NULL; bool sign; - - /* read number as raw string if has flag */ - if (unlikely(pre)) { - return read_number_raw(ptr, pre, ext, val, msg); + + /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */ + if (unlikely(pre && !has_read_flag(BIGNUM_AS_RAW))) { + return read_number_raw(ptr, pre, flg, val, msg); } - + sign = (*hdr == '-'); cur += sign; sig = (u8)(*cur - '0'); - + /* read first digit, check leading zero */ if (unlikely(!digi_is_digit(*cur))) { - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_inf_or_nan(sign, &cur, pre, val)) { *end = cur; return true; @@ -4063,17 +5064,17 @@ static_noinline bool read_number(u8 **ptr, if (unlikely(digi_is_digit(*cur))) { return_err(cur - 1, "number with leading zero is not allowed"); } - if (!digi_is_fp(*cur)) return_i64(0); + if (!digi_is_fp(*cur)) return_0(); goto read_double; } - + /* read continuous digits, up to 19 characters */ #define expr_intg(i) \ if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) sig = num + sig * 10; \ else { cur += i; goto intg_end; } - repeat_in_1_18(expr_intg); + repeat_in_1_18(expr_intg) #undef expr_intg - + /* here are 19 continuous digits, skip them */ cur += 19; if (digi_is_digit(cur[0]) && !digi_is_digit_or_fp(cur[1])) { @@ -4083,24 +5084,31 @@ static_noinline bool read_number(u8 **ptr, (sig == (U64_MAX / 10) && num <= (U64_MAX % 10))) { sig = num + sig * 10; cur++; - if (sign) return_f64(normalized_u64_to_f64(sig)); + if (sign) { + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); + return_f64(normalized_u64_to_f64(sig)); + } return_i64(sig); } } - + intg_end: /* continuous digits ended */ if (!digi_is_digit_or_fp(*cur)) { /* this number is an integer consisting of 1 to 19 digits */ if (sign && (sig > ((u64)1 << 63))) { + if (has_read_flag(BIGNUM_AS_RAW)) return_raw(); return_f64(normalized_u64_to_f64(sig)); } return_i64(sig); } - + read_double: /* this number should be read as double */ while (digi_is_digit(*cur)) cur++; + if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) { + return_raw(); /* it's a large integer */ + } if (*cur == '.') { /* skip fraction part */ dot = cur; @@ -4120,43 +5128,47 @@ static_noinline bool read_number(u8 **ptr, cur++; while (digi_is_digit(*cur)) cur++; } - + /* libc's strtod() is used to parse the floating-point number. - + Note that the decimal point character used by strtod() is locale-dependent, and the rounding direction may affected by fesetround(). - + For currently known locales, (en, zh, ja, ko, am, he, hi) use '.' as the decimal point, while other locales use ',' as the decimal point. - + Here strtod() is called twice for different locales, but if another thread happens calls setlocale() between two strtod(), parsing may still fail. */ val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end); if (unlikely(f64_end != cur)) { + /* replace '.' with ',' for locale */ bool cut = (*cur == ','); - if (dot) *dot = ','; if (cut) *cur = ' '; + if (dot) *dot = ','; val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end); + /* restore ',' to '.' */ if (cut) *cur = ','; + if (dot) *dot = '.'; if (unlikely(f64_end != cur)) { return_err(hdr, "strtod() failed to parse the number"); } } - if (unlikely(val->uni.f64 == HUGE_VAL || val->uni.f64 == -HUGE_VAL)) { - if (!ext) { - return_err(hdr, "number is infinity when parsed as double"); - } + if (unlikely(val->uni.f64 >= HUGE_VAL || val->uni.f64 <= -HUGE_VAL)) { + return_inf(); } val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; *end = cur; return true; - -#undef has_flag + #undef return_err +#undef return_0 #undef return_i64 #undef return_f64 +#undef return_f64_bin +#undef return_inf +#undef return_raw } #endif /* FP_READER */ @@ -4255,6 +5267,7 @@ static_inline bool read_string(u8 **ptr, const u32 b4_err0 = 0x00000004UL; const u32 b4_err1 = 0x00003003UL; #else + /* this should be evaluated at compile-time */ v32_uni b1_mask_uni = {{ 0x80, 0x00, 0x00, 0x00 }}; v32_uni b1_patt_uni = {{ 0x00, 0x00, 0x00, 0x00 }}; v32_uni b2_mask_uni = {{ 0xE0, 0xC0, 0x00, 0x00 }}; @@ -4284,7 +5297,7 @@ static_inline bool read_string(u8 **ptr, u32 b4_err0 = b4_err0_uni.u; u32 b4_err1 = b4_err1_uni.u; #endif - + #define is_valid_seq_1(uni) ( \ ((uni & b1_mask) == b1_patt) \ ) @@ -4293,84 +5306,85 @@ static_inline bool read_string(u8 **ptr, ((uni & b2_mask) == b2_patt) && \ ((uni & b2_requ)) \ ) - + #define is_valid_seq_3(uni) ( \ ((uni & b3_mask) == b3_patt) && \ ((tmp = (uni & b3_requ))) && \ ((tmp != b3_erro)) \ ) - + #define is_valid_seq_4(uni) ( \ ((uni & b4_mask) == b4_patt) && \ ((tmp = (uni & b4_requ))) && \ ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0) \ ) - + #define return_err(_end, _msg) do { \ *msg = _msg; \ *end = _end; \ return false; \ } while (false) - + u8 *cur = *ptr; u8 **end = ptr; u8 *src = ++cur, *dst, *pos; u16 hi, lo; u32 uni, tmp; - + skip_ascii: /* Most strings have no escaped characters, so we can jump them quickly. */ - + skip_ascii_begin: /* We want to make loop unrolling, as shown in the following code. Some compiler may not generate instructions as expected, so we rewrite it with explicit goto statements. We hope the compiler can generate instructions like this: https://godbolt.org/z/8vjsYq - + while (true) repeat16({ if (likely(!(char_is_ascii_stop(*src)))) src++; else break; - }); + }) */ #define expr_jump(i) \ if (likely(!char_is_ascii_stop(src[i]))) {} \ else goto skip_ascii_stop##i; - + #define expr_stop(i) \ skip_ascii_stop##i: \ src += i; \ goto skip_ascii_end; - - repeat16_incr(expr_jump); + + repeat16_incr(expr_jump) src += 16; goto skip_ascii_begin; - repeat16_incr(expr_stop); - + repeat16_incr(expr_stop) + #undef expr_jump #undef expr_stop - + skip_ascii_end: - + /* GCC may store src[i] in a register at each line of expr_jump(i) above. These instructions are useless and will degrade performance. This inline asm is a hint for gcc: "the memory has been modified, do not cache it". - + MSVC, Clang, ICC can generate expected instructions without this hint. */ #if YYJSON_IS_REAL_GCC __asm__ volatile("":"=m"(*src)); #endif if (likely(*src == '"')) { - val->tag = ((u64)(src - cur) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR; + val->tag = ((u64)(src - cur) << YYJSON_TAG_BIT) | + (u64)(YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC); val->uni.str = (const char *)cur; *src = '\0'; *end = src + 1; return true; } - + skip_utf8: if (*src & 0x80) { /* non-ASCII character */ /* @@ -4381,6 +5395,21 @@ static_inline bool read_string(u8 **ptr, loop, which is more friendly to branch prediction. */ pos = src; +#if YYJSON_DISABLE_UTF8_VALIDATION + while (true) repeat8({ + if (likely((*src & 0xF0) == 0xE0)) src += 3; + else break; + }) + if (*src < 0x80) goto skip_ascii; + while (true) repeat8({ + if (likely((*src & 0xE0) == 0xC0)) src += 2; + else break; + }) + while (true) repeat8({ + if (likely((*src & 0xF8) == 0xF0)) src += 4; + else break; + }) +#else uni = byte_load_4(src); while (is_valid_seq_3(uni)) { src += 3; @@ -4395,13 +5424,14 @@ static_inline bool read_string(u8 **ptr, src += 4; uni = byte_load_4(src); } +#endif if (unlikely(pos == src)) { if (!inv) return_err(src, "invalid UTF-8 encoding in string"); ++src; } goto skip_ascii; } - + /* The escape character appears, we need to copy it. */ dst = src; copy_escape: @@ -4417,7 +5447,7 @@ static_inline bool read_string(u8 **ptr, case 't': *dst++ = '\t'; src++; break; case 'u': if (unlikely(!read_hex_u16(++src, &hi))) { - return_err(src - 2, "invalid escaped unicode in string"); + return_err(src - 2, "invalid escaped sequence in string"); } src += 4; if (likely((hi & 0xF800) != 0xD800)) { @@ -4437,9 +5467,11 @@ static_inline bool read_string(u8 **ptr, if (unlikely((hi & 0xFC00) != 0xD800)) { return_err(src - 6, "invalid high surrogate in string"); } - if (unlikely(!byte_match_2(src, "\\u")) || - unlikely(!read_hex_u16(src + 2, &lo))) { - return_err(src, "no matched low surrogate in string"); + if (unlikely(!byte_match_2(src, "\\u"))) { + return_err(src, "no low surrogate in string"); + } + if (unlikely(!read_hex_u16(src + 2, &lo))) { + return_err(src, "invalid escaped sequence in string"); } if (unlikely((lo & 0xFC00) != 0xDC00)) { return_err(src, "invalid low surrogate in string"); @@ -4466,15 +5498,15 @@ static_inline bool read_string(u8 **ptr, if (src >= lst) return_err(src, "unclosed string"); *dst++ = *src++; } - + copy_ascii: /* Copy continuous ASCII, loop unrolling, same as the following code: - + while (true) repeat16({ if (unlikely(char_is_ascii_stop(*src))) break; *dst++ = *src++; - }); + }) */ #if YYJSON_IS_REAL_GCC # define expr_jump(i) \ @@ -4485,14 +5517,18 @@ static_inline bool read_string(u8 **ptr, if (likely(!(char_is_ascii_stop(src[i])))) {} \ else { goto copy_ascii_stop_##i; } #endif - repeat16_incr(expr_jump); + repeat16_incr(expr_jump) #undef expr_jump - + byte_move_16(dst, src); src += 16; dst += 16; goto copy_ascii; - + + /* + The memory will be moved forward by at least 1 byte. So the `byte_move` + can be one byte more than needed to reduce the number of instructions. + */ copy_ascii_stop_0: goto copy_utf8; copy_ascii_stop_1: @@ -4580,30 +5616,58 @@ static_inline bool read_string(u8 **ptr, src += 15; dst += 15; goto copy_utf8; - + copy_utf8: if (*src & 0x80) { /* non-ASCII character */ pos = src; uni = byte_load_4(src); +#if YYJSON_DISABLE_UTF8_VALIDATION + while (true) repeat4({ + if ((uni & b3_mask) == b3_patt) { + byte_copy_4(dst, &uni); + dst += 3; + src += 3; + uni = byte_load_4(src); + } else break; + }) + if ((uni & b1_mask) == b1_patt) goto copy_ascii; + while (true) repeat4({ + if ((uni & b2_mask) == b2_patt) { + byte_copy_2(dst, &uni); + dst += 2; + src += 2; + uni = byte_load_4(src); + } else break; + }) + while (true) repeat4({ + if ((uni & b4_mask) == b4_patt) { + byte_copy_4(dst, &uni); + dst += 4; + src += 4; + uni = byte_load_4(src); + } else break; + }) +#else while (is_valid_seq_3(uni)) { - byte_move_4(dst, &uni); + byte_copy_4(dst, &uni); dst += 3; src += 3; uni = byte_load_4(src); } if (is_valid_seq_1(uni)) goto copy_ascii; while (is_valid_seq_2(uni)) { - byte_move_2(dst, &uni); + byte_copy_2(dst, &uni); dst += 2; src += 2; uni = byte_load_4(src); } while (is_valid_seq_4(uni)) { - byte_move_4(dst, &uni); + byte_copy_4(dst, &uni); dst += 4; src += 4; uni = byte_load_4(src); } +#endif if (unlikely(pos == src)) { if (!inv) return_err(src, "invalid UTF-8 encoding in string"); goto copy_ascii_stop_1; @@ -4611,7 +5675,7 @@ static_inline bool read_string(u8 **ptr, goto copy_ascii; } goto copy_escape; - + #undef return_err #undef is_valid_seq_1 #undef is_valid_seq_2 @@ -4636,11 +5700,9 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - -#define has_flag(_flag) unlikely((flg & YYJSON_READ_##_flag) != 0) - + #define return_err(_pos, _code, _msg) do { \ - if (_pos >= end) { \ + if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ err->pos = (usize)(end - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = "unexpected end of data"; \ @@ -4652,35 +5714,33 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ return NULL; \ } while (false) - + usize hdr_len; /* value count used by doc */ usize alc_num; /* value count capacity */ yyjson_val *val_hdr; /* the head of allocated values */ yyjson_val *val; /* current value */ yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - + bool raw; /* read number as raw */ - bool ext; /* allow inf and nan */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ - + hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; alc_num = hdr_len + 1; /* single value */ - + val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_num * sizeof(yyjson_val)); if (unlikely(!val_hdr)) goto fail_alloc; val = val_hdr + hdr_len; - raw = (flg & YYJSON_READ_NUMBER_AS_RAW) != 0; - ext = (flg & YYJSON_READ_ALLOW_INF_AND_NAN) != 0; - inv = (flg & YYJSON_READ_ALLOW_INVALID_UNICODE) != 0; + raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); + inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; raw_end = NULL; pre = raw ? &raw_end : NULL; - + if (char_is_number(*cur)) { - if (likely(read_number(&cur, pre, ext, val, &msg))) goto doc_end; + if (likely(read_number(&cur, pre, flg, val, &msg))) goto doc_end; goto fail_number; } if (*cur == '"') { @@ -4697,20 +5757,20 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, } if (*cur == 'n') { if (likely(read_null(&cur, val))) goto doc_end; - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_nan(false, &cur, pre, val)) goto doc_end; } goto fail_literal; } - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_inf_or_nan(false, &cur, pre, val)) goto doc_end; } goto fail_character; - + doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_flag(STOP_WHEN_DONE)) { - if (has_flag(ALLOW_COMMENTS)) { + if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (!skip_spaces_and_comments(&cur)) { if (byte_match_2(cur, "/*")) goto fail_comment; } @@ -4719,16 +5779,16 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, } if (unlikely(cur < end)) goto fail_garbage; } - + if (pre && *pre) **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = 1; - doc->str_pool = has_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; return doc; - + fail_string: return_err(cur, INVALID_STRING, msg); fail_number: @@ -4743,8 +5803,7 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr, return_err(cur, UNEXPECTED_CHARACTER, "unexpected character"); fail_garbage: return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document"); - -#undef has_flag + #undef return_err } @@ -4755,11 +5814,9 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - -#define has_flag(_flag) unlikely((flg & YYJSON_READ_##_flag) != 0) - + #define return_err(_pos, _code, _msg) do { \ - if (_pos >= end) { \ + if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ err->pos = (usize)(end - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = "unexpected end of data"; \ @@ -4771,13 +5828,13 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ return NULL; \ } while (false) - + #define val_incr() do { \ val++; \ if (unlikely(val >= val_end)) { \ usize alc_old = alc_len; \ alc_len += alc_len / 2; \ - if ((alc_len >= alc_max)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc; \ val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr, \ alc_old * sizeof(yyjson_val), \ alc_len * sizeof(yyjson_val)); \ @@ -4788,7 +5845,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val_end = val_tmp + (alc_len - 2); \ } \ } while (false) - + usize dat_len; /* data length in bytes, hint for allocator */ usize hdr_len; /* value count used by yyjson_doc */ usize alc_len; /* value count allocated */ @@ -4802,32 +5859,30 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, yyjson_val *ctn_parent; /* parent of current container */ yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - + bool raw; /* read number as raw */ - bool ext; /* allow inf and nan */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ - - dat_len = has_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); + + dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; alc_max = USIZE_MAX / sizeof(yyjson_val); alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4; alc_len = yyjson_min(alc_len, alc_max); - + val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val)); if (unlikely(!val_hdr)) goto fail_alloc; val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */ val = val_hdr + hdr_len; ctn = val; ctn_len = 0; - raw = (flg & YYJSON_READ_NUMBER_AS_RAW) != 0; - ext = (flg & YYJSON_READ_ALLOW_INF_AND_NAN) != 0; - inv = (flg & YYJSON_READ_ALLOW_INVALID_UNICODE) != 0; + raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); + inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; raw_end = NULL; pre = raw ? &raw_end : NULL; - + if (*cur++ == '{') { ctn->tag = YYJSON_TYPE_OBJ; ctn->uni.ofs = 0; @@ -4837,21 +5892,21 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, ctn->uni.ofs = 0; goto arr_val_begin; } - + arr_begin: /* save current container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK); - + /* create a new array value, save parent container offset */ val_incr(); val->tag = YYJSON_TYPE_ARR; val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); - + /* push the new array value as current container */ ctn = val; ctn_len = 0; - + arr_val_begin: if (*cur == '{') { cur++; @@ -4864,7 +5919,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, if (char_is_number(*cur)) { val_incr(); ctn_len++; - if (likely(read_number(&cur, pre, ext, val, &msg))) goto arr_val_end; + if (likely(read_number(&cur, pre, flg, val, &msg))) goto arr_val_end; goto fail_number; } if (*cur == '"') { @@ -4889,7 +5944,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_nan(false, &cur, pre, val)) goto arr_val_end; } goto fail_literal; @@ -4897,26 +5952,27 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, if (*cur == ']') { cur++; if (likely(ctn_len == 0)) goto arr_end; - if (has_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; - cur--; + if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; + while (*cur != ',') cur--; goto fail_trailing_comma; } if (char_is_space(*cur)) { while (char_is_space(*++cur)); goto arr_val_begin; } - if (unlikely(ext) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + if (has_read_flag(ALLOW_INF_AND_NAN) && + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end; goto fail_character; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto arr_val_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + arr_val_end: if (*cur == ',') { cur++; @@ -4930,21 +5986,21 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto arr_val_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto arr_val_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + arr_end: /* get parent container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); - + /* save the next sibling value offset */ ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; if (unlikely(ctn == ctn_parent)) goto doc_end; - + /* pop parent as current container */ ctn = ctn_parent; ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); @@ -4953,7 +6009,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, } else { goto arr_val_end; } - + obj_begin: /* push container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | @@ -4964,7 +6020,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); ctn = val; ctn_len = 0; - + obj_key_begin: if (likely(*cur == '"')) { val_incr(); @@ -4975,20 +6031,20 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, if (likely(*cur == '}')) { cur++; if (likely(ctn_len == 0)) goto obj_end; - if (has_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; - cur--; + if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; + while (*cur != ',') cur--; goto fail_trailing_comma; } if (char_is_space(*cur)) { while (char_is_space(*++cur)); goto obj_key_begin; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_key_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_key_end: if (*cur == ':') { cur++; @@ -4998,12 +6054,12 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto obj_key_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_key_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_val_begin: if (*cur == '"') { val++; @@ -5014,7 +6070,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, if (char_is_number(*cur)) { val++; ctn_len++; - if (likely(read_number(&cur, pre, ext, val, &msg))) goto obj_val_end; + if (likely(read_number(&cur, pre, flg, val, &msg))) goto obj_val_end; goto fail_number; } if (*cur == '{') { @@ -5041,7 +6097,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_nan(false, &cur, pre, val)) goto obj_val_end; } goto fail_literal; @@ -5050,18 +6106,19 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_begin; } - if (unlikely(ext) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + if (has_read_flag(ALLOW_INF_AND_NAN) && + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end; goto fail_character; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_val_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_val_end: if (likely(*cur == ',')) { cur++; @@ -5075,12 +6132,12 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_val_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_end: /* pop container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); @@ -5095,27 +6152,28 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, } else { goto arr_val_end; } - + doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_flag(STOP_WHEN_DONE)) { - if (has_flag(ALLOW_COMMENTS)) { + if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { + if (has_read_flag(ALLOW_COMMENTS)) { skip_spaces_and_comments(&cur); if (byte_match_2(cur, "/*")) goto fail_comment; + } else { + while (char_is_space(*cur)) cur++; } - else while (char_is_space(*cur)) cur++; if (unlikely(cur < end)) goto fail_garbage; } - + if (pre && *pre) **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = (usize)((val - doc->root) + 1); - doc->str_pool = has_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; return doc; - + fail_string: return_err(cur, INVALID_STRING, msg); fail_number: @@ -5132,8 +6190,7 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, return_err(cur, UNEXPECTED_CHARACTER, "unexpected character"); fail_garbage: return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document"); - -#undef has_flag + #undef val_incr #undef return_err } @@ -5145,11 +6202,9 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, yyjson_alc alc, yyjson_read_flag flg, yyjson_read_err *err) { - -#define has_flag(_flag) unlikely((flg & YYJSON_READ_##_flag) != 0) - + #define return_err(_pos, _code, _msg) do { \ - if (_pos >= end) { \ + if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \ err->pos = (usize)(end - hdr); \ err->code = YYJSON_READ_ERROR_UNEXPECTED_END; \ err->msg = "unexpected end of data"; \ @@ -5161,13 +6216,13 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (val_hdr) alc.free(alc.ctx, (void *)val_hdr); \ return NULL; \ } while (false) - + #define val_incr() do { \ val++; \ if (unlikely(val >= val_end)) { \ usize alc_old = alc_len; \ alc_len += alc_len / 2; \ - if ((alc_len >= alc_max)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc; \ val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr, \ alc_old * sizeof(yyjson_val), \ alc_len * sizeof(yyjson_val)); \ @@ -5178,7 +6233,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, val_end = val_tmp + (alc_len - 2); \ } \ } while (false) - + usize dat_len; /* data length in bytes, hint for allocator */ usize hdr_len; /* value count used by yyjson_doc */ usize alc_len; /* value count allocated */ @@ -5192,32 +6247,30 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, yyjson_val *ctn_parent; /* parent of current container */ yyjson_doc *doc; /* the JSON document, equals to val_hdr */ const char *msg; /* error message */ - + bool raw; /* read number as raw */ - bool ext; /* allow inf and nan */ bool inv; /* allow invalid unicode */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ - - dat_len = has_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); + + dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur); hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val); hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0; alc_max = USIZE_MAX / sizeof(yyjson_val); alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_PRETTY_RATIO) + 4; alc_len = yyjson_min(alc_len, alc_max); - + val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val)); if (unlikely(!val_hdr)) goto fail_alloc; val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */ val = val_hdr + hdr_len; ctn = val; ctn_len = 0; - raw = (flg & YYJSON_READ_NUMBER_AS_RAW) != 0; - ext = (flg & YYJSON_READ_ALLOW_INF_AND_NAN) != 0; - inv = (flg & YYJSON_READ_ALLOW_INVALID_UNICODE) != 0; + raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW); + inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0; raw_end = NULL; pre = raw ? &raw_end : NULL; - + if (*cur++ == '{') { ctn->tag = YYJSON_TYPE_OBJ; ctn->uni.ofs = 0; @@ -5229,35 +6282,35 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (*cur == '\n') cur++; goto arr_val_begin; } - + arr_begin: /* save current container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK); - + /* create a new array value, save parent container offset */ val_incr(); val->tag = YYJSON_TYPE_ARR; val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn); - + /* push the new array value as current container */ ctn = val; ctn_len = 0; if (*cur == '\n') cur++; - + arr_val_begin: #if YYJSON_IS_REAL_GCC while (true) repeat16({ if (byte_match_2(cur, " ")) cur += 2; else break; - }); + }) #else while (true) repeat16({ if (likely(byte_match_2(cur, " "))) cur += 2; else break; - }); + }) #endif - + if (*cur == '{') { cur++; goto obj_begin; @@ -5269,7 +6322,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (char_is_number(*cur)) { val_incr(); ctn_len++; - if (likely(read_number(&cur, pre, ext, val, &msg))) goto arr_val_end; + if (likely(read_number(&cur, pre, flg, val, &msg))) goto arr_val_end; goto fail_number; } if (*cur == '"') { @@ -5294,7 +6347,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, val_incr(); ctn_len++; if (likely(read_null(&cur, val))) goto arr_val_end; - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_nan(false, &cur, pre, val)) goto arr_val_end; } goto fail_literal; @@ -5302,26 +6355,27 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (*cur == ']') { cur++; if (likely(ctn_len == 0)) goto arr_end; - if (has_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; - cur--; + if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end; + while (*cur != ',') cur--; goto fail_trailing_comma; } if (char_is_space(*cur)) { while (char_is_space(*++cur)); goto arr_val_begin; } - if (unlikely(ext) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + if (has_read_flag(ALLOW_INF_AND_NAN) && + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val_incr(); ctn_len++; if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end; goto fail_character; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto arr_val_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + arr_val_end: if (byte_match_2(cur, ",\n")) { cur += 2; @@ -5339,21 +6393,21 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto arr_val_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto arr_val_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + arr_end: /* get parent container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); - + /* save the next sibling value offset */ ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val); ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR; if (unlikely(ctn == ctn_parent)) goto doc_end; - + /* pop parent as current container */ ctn = ctn_parent; ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT); @@ -5363,7 +6417,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, } else { goto arr_val_end; } - + obj_begin: /* push container */ ctn->tag = (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | @@ -5375,18 +6429,18 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, ctn = val; ctn_len = 0; if (*cur == '\n') cur++; - + obj_key_begin: #if YYJSON_IS_REAL_GCC while (true) repeat16({ if (byte_match_2(cur, " ")) cur += 2; else break; - }); + }) #else while (true) repeat16({ if (likely(byte_match_2(cur, " "))) cur += 2; else break; - }); + }) #endif if (likely(*cur == '"')) { val_incr(); @@ -5397,20 +6451,20 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (likely(*cur == '}')) { cur++; if (likely(ctn_len == 0)) goto obj_end; - if (has_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; - cur--; + if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end; + while (*cur != ',') cur--; goto fail_trailing_comma; } if (char_is_space(*cur)) { while (char_is_space(*++cur)); goto obj_key_begin; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_key_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_key_end: if (byte_match_2(cur, ": ")) { cur += 2; @@ -5424,12 +6478,12 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto obj_key_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_key_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_val_begin: if (*cur == '"') { val++; @@ -5440,7 +6494,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, if (char_is_number(*cur)) { val++; ctn_len++; - if (likely(read_number(&cur, pre, ext, val, &msg))) goto obj_val_end; + if (likely(read_number(&cur, pre, flg, val, &msg))) goto obj_val_end; goto fail_number; } if (*cur == '{') { @@ -5467,7 +6521,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, val++; ctn_len++; if (likely(read_null(&cur, val))) goto obj_val_end; - if (unlikely(ext)) { + if (has_read_flag(ALLOW_INF_AND_NAN)) { if (read_nan(false, &cur, pre, val)) goto obj_val_end; } goto fail_literal; @@ -5476,18 +6530,19 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_begin; } - if (unlikely(ext) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) { + if (has_read_flag(ALLOW_INF_AND_NAN) && + (*cur == 'i' || *cur == 'I' || *cur == 'N')) { val++; ctn_len++; if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end; goto fail_character; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_val_begin; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_val_end: if (byte_match_2(cur, ",\n")) { cur += 2; @@ -5505,12 +6560,12 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, while (char_is_space(*++cur)); goto obj_val_end; } - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (skip_spaces_and_comments(&cur)) goto obj_val_end; if (byte_match_2(cur, "/*")) goto fail_comment; } goto fail_character; - + obj_end: /* pop container */ ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs); @@ -5526,27 +6581,28 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, } else { goto arr_val_end; } - + doc_end: /* check invalid contents after json document */ - if (unlikely(cur < end) && !has_flag(STOP_WHEN_DONE)) { - if (has_flag(ALLOW_COMMENTS)) { + if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) { + if (has_read_flag(ALLOW_COMMENTS)) { skip_spaces_and_comments(&cur); if (byte_match_2(cur, "/*")) goto fail_comment; + } else { + while (char_is_space(*cur)) cur++; } - else while (char_is_space(*cur)) cur++; if (unlikely(cur < end)) goto fail_garbage; } - + if (pre && *pre) **pre = '\0'; doc = (yyjson_doc *)val_hdr; doc->root = val_hdr + hdr_len; doc->alc = alc; doc->dat_read = (usize)(cur - hdr); doc->val_read = (usize)((val - val_hdr)) - hdr_len + 1; - doc->str_pool = has_flag(INSITU) ? NULL : (char *)hdr; + doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr; return doc; - + fail_string: return_err(cur, INVALID_STRING, msg); fail_number: @@ -5563,8 +6619,7 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr, return_err(cur, UNEXPECTED_CHARACTER, "unexpected character"); fail_garbage: return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document"); - -#undef has_flag + #undef val_incr #undef return_err } @@ -5580,29 +6635,20 @@ yyjson_doc *yyjson_read_opts(char *dat, yyjson_read_flag flg, const yyjson_alc *alc_ptr, yyjson_read_err *err) { - -#define has_flag(_flag) unlikely((flg & YYJSON_READ_##_flag) != 0) - + #define return_err(_pos, _code, _msg) do { \ err->pos = (usize)(_pos); \ err->msg = _msg; \ err->code = YYJSON_READ_ERROR_##_code; \ - if (!has_flag(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \ + if (!has_read_flag(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \ return NULL; \ } while (false) - + yyjson_read_err dummy_err; yyjson_alc alc; yyjson_doc *doc; u8 *hdr = NULL, *end, *cur; - -#if YYJSON_DISABLE_NON_STANDARD - flg &= ~YYJSON_READ_ALLOW_TRAILING_COMMAS; - flg &= ~YYJSON_READ_ALLOW_COMMENTS; - flg &= ~YYJSON_READ_ALLOW_INF_AND_NAN; - flg &= ~YYJSON_READ_ALLOW_INVALID_UNICODE; -#endif - + /* validate input parameters */ if (!err) err = &dummy_err; if (likely(!alc_ptr)) { @@ -5616,9 +6662,9 @@ yyjson_doc *yyjson_read_opts(char *dat, if (unlikely(!len)) { return_err(0, INVALID_PARAMETER, "input length is 0"); } - + /* add 4-byte zero padding for input data if necessary */ - if (has_flag(INSITU)) { + if (has_read_flag(INSITU)) { hdr = (u8 *)dat; end = (u8 *)dat + len; cur = (u8 *)dat; @@ -5635,10 +6681,10 @@ yyjson_doc *yyjson_read_opts(char *dat, memcpy(hdr, dat, len); memset(end, 0, YYJSON_PADDING_SIZE); } - + /* skip empty contents before json document */ if (unlikely(char_is_space_or_comment(*cur))) { - if (has_flag(ALLOW_COMMENTS)) { + if (has_read_flag(ALLOW_COMMENTS)) { if (!skip_spaces_and_comments(&cur)) { return_err(cur - hdr, INVALID_COMMENT, "unclosed multiline comment"); @@ -5652,7 +6698,7 @@ yyjson_doc *yyjson_read_opts(char *dat, return_err(0, EMPTY_CONTENT, "input data is empty"); } } - + /* read json document */ if (likely(char_is_container(*cur))) { if (char_is_space(cur[1]) && char_is_space(cur[2])) { @@ -5663,7 +6709,7 @@ yyjson_doc *yyjson_read_opts(char *dat, } else { doc = read_root_single(hdr, cur, end, alc, flg, err); } - + /* check result */ if (likely(doc)) { memset(err, 0, sizeof(yyjson_read_err)); @@ -5684,11 +6730,10 @@ yyjson_doc *yyjson_read_opts(char *dat, err->msg = "UTF-16 encoding is not supported"; } } - if (!has_flag(INSITU)) alc.free(alc.ctx, (void *)hdr); + if (!has_read_flag(INSITU)) alc.free(alc.ctx, (void *)hdr); } return doc; - -#undef has_flag + #undef return_err } @@ -5696,37 +6741,65 @@ yyjson_doc *yyjson_read_file(const char *path, yyjson_read_flag flg, const yyjson_alc *alc_ptr, yyjson_read_err *err) { - #define return_err(_code, _msg) do { \ err->pos = 0; \ err->msg = _msg; \ err->code = YYJSON_READ_ERROR_##_code; \ - if (file) fclose(file); \ + return NULL; \ +} while (false) + + yyjson_read_err dummy_err; + yyjson_doc *doc; + FILE *file; + + if (!err) err = &dummy_err; + if (unlikely(!path)) return_err(INVALID_PARAMETER, "input path is NULL"); + + file = fopen_readonly(path); + if (unlikely(!file)) return_err(FILE_OPEN, "file opening failed"); + + doc = yyjson_read_fp(file, flg, alc_ptr, err); + fclose(file); + return doc; + +#undef return_err +} + +yyjson_doc *yyjson_read_fp(FILE *file, + yyjson_read_flag flg, + const yyjson_alc *alc_ptr, + yyjson_read_err *err) { +#define return_err(_code, _msg) do { \ + err->pos = 0; \ + err->msg = _msg; \ + err->code = YYJSON_READ_ERROR_##_code; \ if (buf) alc.free(alc.ctx, buf); \ return NULL; \ } while (false) - + yyjson_read_err dummy_err; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; yyjson_doc *doc; - - FILE *file = NULL; - long file_size = 0; + + long file_size = 0, file_pos; void *buf = NULL; usize buf_size = 0; - + /* validate input parameters */ if (!err) err = &dummy_err; - if (unlikely(!path)) return_err(INVALID_PARAMETER, "input path is NULL"); - - /* open file */ - file = fopen_readonly(path); - if (file == NULL) return_err(FILE_OPEN, "file opening failed"); - - /* get file size */ - if (fseek(file, 0, SEEK_END) == 0) file_size = ftell(file); - rewind(file); - + if (unlikely(!file)) return_err(INVALID_PARAMETER, "input file is NULL"); + + /* get current position */ + file_pos = ftell(file); + if (file_pos != -1) { + /* get total file size, may fail */ + if (fseek(file, 0, SEEK_END) == 0) file_size = ftell(file); + /* reset to original position, may fail */ + if (fseek(file, file_pos, SEEK_SET) != 0) file_size = 0; + /* get file size from current postion to end */ + if (file_size > 0) file_size -= file_pos; + } + /* read file */ if (file_size > 0) { /* read the entire file in one call */ @@ -5745,7 +6818,7 @@ yyjson_doc *yyjson_read_file(const char *path, usize chunk_now = chunk_min; usize read_size; void *tmp; - + buf_size = YYJSON_PADDING_SIZE; while (true) { if (buf_size + chunk_now < buf_size) { /* overflow */ @@ -5764,13 +6837,12 @@ yyjson_doc *yyjson_read_file(const char *path, read_size = fread_safe(tmp, chunk_now, file); file_size += (long)read_size; if (read_size != chunk_now) break; - + chunk_now *= 2; if (chunk_now > chunk_max) chunk_now = chunk_max; } } - fclose(file); - + /* read JSON */ memset((u8 *)buf + file_size, 0, YYJSON_PADDING_SIZE); flg |= YYJSON_READ_INSITU; @@ -5782,7 +6854,7 @@ yyjson_doc *yyjson_read_file(const char *path, alc.free(alc.ctx, buf); return NULL; } - + #undef return_err } @@ -5797,20 +6869,19 @@ const char *yyjson_read_number(const char *dat, err->code = YYJSON_READ_ERROR_##_code; \ return NULL; \ } while (false) - - u8 *hdr = (u8 *)dat, *cur = hdr; + + u8 *hdr = constcast(u8 *)dat, *cur = hdr; bool raw; /* read number as raw */ - bool ext; /* allow inf and nan */ u8 *raw_end; /* raw end for null-terminator */ u8 **pre; /* previous raw end pointer */ const char *msg; yyjson_read_err dummy_err; - + #if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV u8 buf[128]; usize dat_len; #endif - + if (!err) err = &dummy_err; if (unlikely(!dat)) { return_err(cur, INVALID_PARAMETER, "input data is NULL"); @@ -5818,7 +6889,7 @@ const char *yyjson_read_number(const char *dat, if (unlikely(!val)) { return_err(cur, INVALID_PARAMETER, "output value is NULL"); } - + #if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV if (!alc) alc = &YYJSON_DEFAULT_ALC; dat_len = strlen(dat); @@ -5834,33 +6905,28 @@ const char *yyjson_read_number(const char *dat, } memcpy(hdr, dat, dat_len + 1); } + hdr[dat_len] = 0; #endif - -#if YYJSON_DISABLE_NON_STANDARD - ext = false; -#else - ext = (flg & YYJSON_READ_ALLOW_INF_AND_NAN) != 0; -#endif - - raw = (flg & YYJSON_READ_NUMBER_AS_RAW) != 0; + + raw = (flg & (YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_BIGNUM_AS_RAW)) != 0; raw_end = NULL; pre = raw ? &raw_end : NULL; - + #if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV - if (!read_number(&cur, pre, ext, val, &msg)) { + if (!read_number(&cur, pre, flg, val, &msg)) { if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr); return_err(cur, INVALID_NUMBER, msg); } if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr); - if (raw) val->uni.str = dat; + if (yyjson_is_raw(val)) val->uni.str = dat; return dat + (cur - hdr); #else - if (!read_number(&cur, pre, ext, val, &msg)) { + if (!read_number(&cur, pre, flg, val, &msg)) { return_err(cur, INVALID_NUMBER, msg); } return (const char *)cur; #endif - + #undef return_err } @@ -5920,10 +6986,10 @@ static_inline u8 *write_u32_len_8(u32 val, u8 *buf) { cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ bb = aabb - aa * 100; /* (aabb % 100) */ dd = ccdd - cc * 100; /* (ccdd % 100) */ - ((v16 *)buf)[0] = ((const v16 *)digit_table)[aa]; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; - ((v16 *)buf)[2] = ((const v16 *)digit_table)[cc]; - ((v16 *)buf)[3] = ((const v16 *)digit_table)[dd]; + byte_copy_2(buf + 0, digit_table + aa * 2); + byte_copy_2(buf + 2, digit_table + bb * 2); + byte_copy_2(buf + 4, digit_table + cc * 2); + byte_copy_2(buf + 6, digit_table + dd * 2); return buf + 8; } @@ -5931,41 +6997,41 @@ static_inline u8 *write_u32_len_4(u32 val, u8 *buf) { u32 aa, bb; /* 4 digits: aabb */ aa = (val * 5243) >> 19; /* (val / 100) */ bb = val - aa * 100; /* (val % 100) */ - ((v16 *)buf)[0] = ((const v16 *)digit_table)[aa]; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; + byte_copy_2(buf + 0, digit_table + aa * 2); + byte_copy_2(buf + 2, digit_table + bb * 2); return buf + 4; } static_inline u8 *write_u32_len_1_8(u32 val, u8 *buf) { u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz; - + if (val < 100) { /* 1-2 digits: aa */ lz = val < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (val * 2 + lz)); + byte_copy_2(buf + 0, digit_table + val * 2 + lz); buf -= lz; return buf + 2; - + } else if (val < 10000) { /* 3-4 digits: aabb */ aa = (val * 5243) >> 19; /* (val / 100) */ bb = val - aa * 100; /* (val % 100) */ lz = aa < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (aa * 2 + lz)); + byte_copy_2(buf + 0, digit_table + aa * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; + byte_copy_2(buf + 2, digit_table + bb * 2); return buf + 4; - + } else if (val < 1000000) { /* 5-6 digits: aabbcc */ aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */ bbcc = val - aa * 10000; /* (val % 10000) */ bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ cc = bbcc - bb * 100; /* (bbcc % 100) */ lz = aa < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (aa * 2 + lz)); + byte_copy_2(buf + 0, digit_table + aa * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; - ((v16 *)buf)[2] = ((const v16 *)digit_table)[cc]; + byte_copy_2(buf + 2, digit_table + bb * 2); + byte_copy_2(buf + 4, digit_table + cc * 2); return buf + 6; - + } else { /* 7-8 digits: aabbccdd */ aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */ ccdd = val - aabb * 10000; /* (val % 10000) */ @@ -5974,30 +7040,30 @@ static_inline u8 *write_u32_len_1_8(u32 val, u8 *buf) { bb = aabb - aa * 100; /* (aabb % 100) */ dd = ccdd - cc * 100; /* (ccdd % 100) */ lz = aa < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (aa * 2 + lz)); + byte_copy_2(buf + 0, digit_table + aa * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; - ((v16 *)buf)[2] = ((const v16 *)digit_table)[cc]; - ((v16 *)buf)[3] = ((const v16 *)digit_table)[dd]; + byte_copy_2(buf + 2, digit_table + bb * 2); + byte_copy_2(buf + 4, digit_table + cc * 2); + byte_copy_2(buf + 6, digit_table + dd * 2); return buf + 8; } } static_inline u8 *write_u64_len_5_8(u32 val, u8 *buf) { u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz; - + if (val < 1000000) { /* 5-6 digits: aabbcc */ aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */ bbcc = val - aa * 10000; /* (val % 10000) */ bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ cc = bbcc - bb * 100; /* (bbcc % 100) */ lz = aa < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (aa * 2 + lz)); + byte_copy_2(buf + 0, digit_table + aa * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; - ((v16 *)buf)[2] = ((const v16 *)digit_table)[cc]; + byte_copy_2(buf + 2, digit_table + bb * 2); + byte_copy_2(buf + 4, digit_table + cc * 2); return buf + 6; - + } else { /* 7-8 digits: aabbccdd */ aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */ ccdd = val - aabb * 10000; /* (val % 10000) */ @@ -6006,11 +7072,11 @@ static_inline u8 *write_u64_len_5_8(u32 val, u8 *buf) { bb = aabb - aa * 100; /* (aabb % 100) */ dd = ccdd - cc * 100; /* (ccdd % 100) */ lz = aa < 10; /* leading zero: 0 or 1 */ - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (aa * 2 + lz)); + byte_copy_2(buf + 0, digit_table + aa * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[bb]; - ((v16 *)buf)[2] = ((const v16 *)digit_table)[cc]; - ((v16 *)buf)[3] = ((const v16 *)digit_table)[dd]; + byte_copy_2(buf + 2, digit_table + bb * 2); + byte_copy_2(buf + 4, digit_table + cc * 2); + byte_copy_2(buf + 6, digit_table + dd * 2); return buf + 8; } } @@ -6018,18 +7084,18 @@ static_inline u8 *write_u64_len_5_8(u32 val, u8 *buf) { static_inline u8 *write_u64(u64 val, u8 *buf) { u64 tmp, hgh; u32 mid, low; - + if (val < 100000000) { /* 1-8 digits */ buf = write_u32_len_1_8((u32)val, buf); return buf; - + } else if (val < (u64)100000000 * 100000000) { /* 9-16 digits */ hgh = val / 100000000; /* (val / 100000000) */ low = (u32)(val - hgh * 100000000); /* (val % 100000000) */ buf = write_u32_len_1_8((u32)hgh, buf); buf = write_u32_len_8(low, buf); return buf; - + } else { /* 17-20 digits */ tmp = val / 100000000; /* (val / 100000000) */ low = (u32)(val - tmp * 100000000); /* (val % 100000000) */ @@ -6115,7 +7181,7 @@ static_inline u8 *write_u64_len_1_to_17(u64 val, u8 *buf) { static_inline u8 *write_u64_len_15_to_17_trim(u8 *buf, u64 sig) { bool lz; /* leading zero */ u32 tz1, tz2, tz; /* trailing zero */ - + u32 abbccddee = (u32)(sig / 100000000); u32 ffgghhii = (u32)(sig - (u64)abbccddee * 100000000); u32 abbcc = abbccddee / 10000; /* (abbccddee / 10000) */ @@ -6124,15 +7190,15 @@ static_inline u8 *write_u64_len_15_to_17_trim(u8 *buf, u64 sig) { u32 a = (abb * 41) >> 12; /* (abb / 100) */ u32 bb = abb - a * 100; /* (abb % 100) */ u32 cc = abbcc - abb * 100; /* (abbcc % 100) */ - + /* write abbcc */ buf[0] = (u8)(a + '0'); buf += a > 0; lz = bb < 10 && a == 0; - ((v16 *)buf)[0] = *(const v16 *)(digit_table + (bb * 2 + lz)); + byte_copy_2(buf + 0, digit_table + bb * 2 + lz); buf -= lz; - ((v16 *)buf)[1] = ((const v16 *)digit_table)[cc]; - + byte_copy_2(buf + 2, digit_table + cc * 2); + if (ffgghhii) { u32 dd = (ddee * 5243) >> 19; /* (ddee / 100) */ u32 ee = ddee - dd * 100; /* (ddee % 100) */ @@ -6140,15 +7206,15 @@ static_inline u8 *write_u64_len_15_to_17_trim(u8 *buf, u64 sig) { u32 hhii = ffgghhii - ffgg * 10000; /* (val % 10000) */ u32 ff = (ffgg * 5243) >> 19; /* (aabb / 100) */ u32 gg = ffgg - ff * 100; /* (aabb % 100) */ - ((v16 *)buf)[2] = ((const v16 *)digit_table)[dd]; - ((v16 *)buf)[3] = ((const v16 *)digit_table)[ee]; - ((v16 *)buf)[4] = ((const v16 *)digit_table)[ff]; - ((v16 *)buf)[5] = ((const v16 *)digit_table)[gg]; + byte_copy_2(buf + 4, digit_table + dd * 2); + byte_copy_2(buf + 6, digit_table + ee * 2); + byte_copy_2(buf + 8, digit_table + ff * 2); + byte_copy_2(buf + 10, digit_table + gg * 2); if (hhii) { u32 hh = (hhii * 5243) >> 19; /* (ccdd / 100) */ u32 ii = hhii - hh * 100; /* (ccdd % 100) */ - ((v16 *)buf)[6] = ((const v16 *)digit_table)[hh]; - ((v16 *)buf)[7] = ((const v16 *)digit_table)[ii]; + byte_copy_2(buf + 12, digit_table + hh * 2); + byte_copy_2(buf + 14, digit_table + ii * 2); tz1 = dec_trailing_zero_table[hh]; tz2 = dec_trailing_zero_table[ii]; tz = ii ? tz2 : (tz1 + 2); @@ -6165,8 +7231,8 @@ static_inline u8 *write_u64_len_15_to_17_trim(u8 *buf, u64 sig) { if (ddee) { u32 dd = (ddee * 5243) >> 19; /* (ddee / 100) */ u32 ee = ddee - dd * 100; /* (ddee % 100) */ - ((v16 *)buf)[2] = ((const v16 *)digit_table)[dd]; - ((v16 *)buf)[3] = ((const v16 *)digit_table)[ee]; + byte_copy_2(buf + 4, digit_table + dd * 2); + byte_copy_2(buf + 6, digit_table + ee * 2); tz1 = dec_trailing_zero_table[dd]; tz2 = dec_trailing_zero_table[ee]; tz = ee ? tz2 : (tz1 + 2); @@ -6189,13 +7255,13 @@ static_inline u8 *write_f64_exp(i32 exp, u8 *buf) { exp = exp < 0 ? -exp : exp; if (exp < 100) { u32 lz = exp < 10; - *(v16 *)&buf[0] = *(const v16 *)(digit_table + ((u32)exp * 2 + lz)); + byte_copy_2(buf + 0, digit_table + (u32)exp * 2 + lz); return buf + 2 - lz; } else { u32 hi = ((u32)exp * 656) >> 16; /* exp / 100 */ u32 lo = (u32)exp - hi * 100; /* exp % 100 */ buf[0] = (u8)((u8)hi + (u8)'0'); - *(v16 *)&buf[1] = *(const v16 *)(digit_table + (lo * 2)); + byte_copy_2(buf + 1, digit_table + lo * 2); return buf + 3; } } @@ -6211,19 +7277,19 @@ static_inline u64 round_to_odd(u64 hi, u64 lo, u64 cp) { /** Convert double number from binary to decimal. The output significand is shortest decimal but may have trailing zeros. - + This function use the Schubfach algorithm: Raffaello Giulietti, The Schubfach way to render doubles (5th version), 2022. https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html https://github.com/openjdk/jdk/pull/3402 (Java implementation) https://github.com/abolz/Drachennest (C++ implementation) - + See also: Dragonbox: A New Floating-Point Binary-to-Decimal Conversion Algorithm, 2022. https://github.com/jk-jeon/dragonbox/blob/master/other_files/Dragonbox.pdf https://github.com/jk-jeon/dragonbox - + @param sig_raw The raw value of significand in IEEE 754 format. @param exp_raw The raw value of exponent in IEEE 754 format. @param sig_bin The decoded value of significand in binary. @@ -6235,41 +7301,41 @@ static_inline u64 round_to_odd(u64 hi, u64 lo, u64 cp) { static_inline void f64_bin_to_dec(u64 sig_raw, u32 exp_raw, u64 sig_bin, i32 exp_bin, u64 *sig_dec, i32 *exp_dec) { - + bool is_even, regular_spacing, u_inside, w_inside, round_up; u64 s, sp, cb, cbl, cbr, vb, vbl, vbr, pow10hi, pow10lo, upper, lower, mid; i32 k, h, exp10; - + is_even = !(sig_bin & 1); regular_spacing = (sig_raw == 0 && exp_raw > 1); - + cbl = 4 * sig_bin - 2 + regular_spacing; cb = 4 * sig_bin; cbr = 4 * sig_bin + 2; - + /* exp_bin: [-1074, 971] */ /* k = regular_spacing ? floor(log10(pow(2, exp_bin))) */ /* : floor(log10(pow(2, exp_bin) * 3.0 / 4.0)) */ /* = regular_spacing ? floor(exp_bin * log10(2)) */ /* : floor(exp_bin * log10(2) + log10(3.0 / 4.0)) */ k = (i32)(exp_bin * 315653 - (regular_spacing ? 131237 : 0)) >> 20; - + /* k: [-324, 292] */ /* h = exp_bin + floor(log2(pow(10, e))) */ /* = exp_bin + floor(log2(10) * e) */ exp10 = -k; h = exp_bin + ((exp10 * 217707) >> 16) + 1; - + pow10_table_get_sig(exp10, &pow10hi, &pow10lo); pow10lo += (exp10 < POW10_SIG_TABLE_MIN_EXACT_EXP || exp10 > POW10_SIG_TABLE_MAX_EXACT_EXP); vbl = round_to_odd(pow10hi, pow10lo, cbl << h); vb = round_to_odd(pow10hi, pow10lo, cb << h); vbr = round_to_odd(pow10hi, pow10lo, cbr << h); - + lower = vbl + !is_even; upper = vbr - !is_even; - + s = vb / 4; if (s >= 10) { sp = s / 10; @@ -6281,44 +7347,45 @@ static_inline void f64_bin_to_dec(u64 sig_raw, u32 exp_raw, return; } } - + u_inside = (lower <= 4 * s); w_inside = (upper >= 4 * s + 4); - + mid = 4 * s + 2; round_up = (vb > mid) || (vb == mid && (s & 1) != 0); - + *sig_dec = s + ((u_inside != w_inside) ? w_inside : round_up); *exp_dec = k; } /** Write a double number (requires 32 bytes buffer). - + We follows the ECMAScript specification to print floating point numbers, but with the following changes: 1. Keep the negative sign of 0.0 to preserve input information. 2. Keep decimal point to indicate the number is floating point. 3. Remove positive sign of exponent part. */ -static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { +static_inline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { u64 sig_bin, sig_dec, sig_raw; i32 exp_bin, exp_dec, sig_len, dot_pos, i, max; u32 exp_raw, hi, lo; u8 *hdr, *num_hdr, *num_end, *dot_end; bool sign; - + /* decode raw bytes from IEEE-754 double format. */ sign = (bool)(raw >> (F64_BITS - 1)); sig_raw = raw & F64_SIG_MASK; exp_raw = (u32)((raw & F64_EXP_MASK) >> F64_SIG_BITS); - + /* return inf and nan */ if (unlikely(exp_raw == ((u32)1 << F64_EXP_BITS) - 1)) { - if (flg & YYJSON_WRITE_INF_AND_NAN_AS_NULL) { + if (has_write_flag(INF_AND_NAN_AS_NULL)) { byte_copy_4(buf, "null"); return buf + 4; - } else if (flg & YYJSON_WRITE_ALLOW_INF_AND_NAN) { + } + else if (has_write_flag(ALLOW_INF_AND_NAN)) { if (sig_raw == 0) { buf[0] = '-'; buf += sign; @@ -6329,28 +7396,27 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { byte_copy_4(buf, "NaN"); return buf + 3; } - } else { - return NULL; } + return NULL; } - + /* add sign for all finite double value, including 0.0 and inf */ buf[0] = '-'; buf += sign; hdr = buf; - + /* return zero */ if ((raw << 1) == 0) { byte_copy_4(buf, "0.0"); buf += 3; return buf; } - + if (likely(exp_raw != 0)) { /* normal number */ sig_bin = sig_raw | ((u64)1 << F64_SIG_BITS); exp_bin = (i32)exp_raw - F64_EXP_BIAS - F64_SIG_BITS; - + /* fast path for small integer number without fraction */ if (-F64_SIG_BITS <= exp_bin && exp_bin <= 0) { if (u64_tz_bits(sig_bin) >= (u32)-exp_bin) { @@ -6362,18 +7428,18 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { return buf; } } - + /* binary to decimal */ f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec); - + /* the sig length is 15 to 17 */ sig_len = 17; sig_len -= (sig_dec < (u64)100000000 * 100000000); sig_len -= (sig_dec < (u64)100000000 * 10000000); - + /* the decimal point position relative to the first digit */ dot_pos = sig_len + exp_dec; - + if (-6 < dot_pos && dot_pos <= 21) { /* no need to write exponent part */ if (dot_pos <= 0) { @@ -6412,15 +7478,15 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { buf = write_f64_exp(exp_dec, end + 1); return buf; } - + } else { /* subnormal number */ sig_bin = sig_raw; exp_bin = 1 - F64_EXP_BIAS - F64_SIG_BITS; - + /* binary to decimal */ f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec); - + /* write significand part */ buf = write_u64_len_1_to_17(sig_dec, buf + 1); hdr[0] = hdr[1]; @@ -6433,7 +7499,7 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { buf += (*buf != '.'); buf[0] = 'e'; buf++; - + /* write exponent part */ buf[0] = '-'; buf++; @@ -6441,7 +7507,7 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { hi = ((u32)exp_dec * 656) >> 16; /* exp / 100 */ lo = (u32)exp_dec - hi * 100; /* exp % 100 */ buf[0] = (u8)((u8)hi + (u8)'0'); - *(v16 *)&buf[1] = *(const v16 *)(digit_table + (lo * 2)); + byte_copy_2(buf + 1, digit_table + lo * 2); buf += 3; return buf; } @@ -6450,18 +7516,18 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { #else /* FP_WRITER */ /** Write a double number (requires 32 bytes buffer). */ -static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { +static_inline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { /* For IEEE 754, `DBL_DECIMAL_DIG` is 17 for round-trip. For non-IEEE formats, 17 is used to avoid buffer overflow, round-trip is not guaranteed. */ -#if defined(DBL_DECIMAL_DIG) +#if defined(DBL_DECIMAL_DIG) && DBL_DECIMAL_DIG != 17 int dig = DBL_DECIMAL_DIG > 17 ? 17 : DBL_DECIMAL_DIG; #else int dig = 17; #endif - + /* The snprintf() function is locale-dependent. For currently known locales, (en, zh, ja, ko, am, he, hi) use '.' as the decimal point, while other @@ -6476,16 +7542,17 @@ static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) { #else int len = sprintf((char *)buf, "%.*g", dig, val); #endif - + u8 *cur = buf; if (unlikely(len < 1)) return NULL; cur += (*cur == '-'); if (unlikely(!digi_is_digit(*cur))) { /* nan, inf, or bad output */ - if (flg & YYJSON_WRITE_INF_AND_NAN_AS_NULL) { + if (has_write_flag(INF_AND_NAN_AS_NULL)) { byte_copy_4(buf, "null"); return buf + 4; - } else if (flg & YYJSON_WRITE_ALLOW_INF_AND_NAN) { + } + else if (has_write_flag(ALLOW_INF_AND_NAN)) { if (*cur == 'i') { byte_copy_8(cur, "Infinity"); cur += 8; @@ -6774,14 +7841,14 @@ static const u8 esc_single_char_table[512] = { /** Returns the encode table with options. */ static_inline const char_enc_type *get_enc_table_with_flag( yyjson_read_flag flg) { - if (unlikely(flg & YYJSON_WRITE_ESCAPE_UNICODE)) { - if (unlikely(flg & YYJSON_WRITE_ESCAPE_SLASHES)) { + if (has_write_flag(ESCAPE_UNICODE)) { + if (has_write_flag(ESCAPE_SLASHES)) { return enc_table_esc_slash; } else { return enc_table_esc; } } else { - if (unlikely(flg & YYJSON_WRITE_ESCAPE_SLASHES)) { + if (has_write_flag(ESCAPE_SLASHES)) { return enc_table_cpy_slash; } else { return enc_table_cpy; @@ -6795,6 +7862,35 @@ static_inline u8 *write_raw(u8 *cur, const u8 *raw, usize raw_len) { return cur + raw_len; } +/** + Write string no-escape. + @param cur Buffer cursor. + @param str A UTF-8 string, null-terminator is not required. + @param str_len Length of string in bytes. + @return The buffer cursor after string. + */ +static_inline u8 *write_string_noesc(u8 *cur, const u8 *str, usize str_len) { + *cur++ = '"'; + while (str_len >= 16) { + byte_copy_16(cur, str); + cur += 16; + str += 16; + str_len -= 16; + } + while (str_len >= 4) { + byte_copy_4(cur, str); + cur += 4; + str += 4; + str_len -= 4; + } + while (str_len) { + *cur++ = *str++; + str_len -= 1; + } + *cur++ = '"'; + return cur; +} + /** Write UTF-8 string (requires len * 6 + 2 bytes buffer). @param cur Buffer cursor. @@ -6808,8 +7904,8 @@ static_inline u8 *write_raw(u8 *cur, const u8 *raw, usize raw_len) { static_inline u8 *write_string(u8 *cur, bool esc, bool inv, const u8 *str, usize str_len, const char_enc_type *enc_table) { - - /* UTF-8 character mask and pattern, see `read_string` for details. */ + + /* UTF-8 character mask and pattern, see `read_string()` for details. */ #if YYJSON_ENDIAN == YYJSON_BIG_ENDIAN const u16 b2_mask = 0xE0C0UL; const u16 b2_patt = 0xC080UL; @@ -6837,6 +7933,7 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, const u32 b4_err0 = 0x00000004UL; const u32 b4_err1 = 0x00003003UL; #else + /* this should be evaluated at compile-time */ v16_uni b2_mask_uni = {{ 0xE0, 0xC0 }}; v16_uni b2_patt_uni = {{ 0xC0, 0x80 }}; v16_uni b2_requ_uni = {{ 0x1E, 0x00 }}; @@ -6862,36 +7959,36 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, u32 b4_err0 = b4_err0_uni.u; u32 b4_err1 = b4_err1_uni.u; #endif - + #define is_valid_seq_2(uni) ( \ ((uni & b2_mask) == b2_patt) && \ ((uni & b2_requ)) \ ) - + #define is_valid_seq_3(uni) ( \ ((uni & b3_mask) == b3_patt) && \ ((tmp = (uni & b3_requ))) && \ ((tmp != b3_erro)) \ ) - + #define is_valid_seq_4(uni) ( \ ((uni & b4_mask) == b4_patt) && \ ((tmp = (uni & b4_requ))) && \ ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0) \ ) - + /* The replacement character U+FFFD, used to indicate invalid character. */ - const v32 rep = { 'F', 'F', 'F', 'D' }; - const v32 pre = { '\\', 'u', '0', '0' }; - + const v32 rep = {{ 'F', 'F', 'F', 'D' }}; + const v32 pre = {{ '\\', 'u', '0', '0' }}; + const u8 *src = str; const u8 *end = str + str_len; *cur++ = '"'; - + copy_ascii: /* Copy continuous ASCII, loop unrolling, same as the following code: - + while (end > src) ( if (unlikely(enc_table[*src])) break; *cur++ = *src++; @@ -6899,37 +7996,37 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, */ #define expr_jump(i) \ if (unlikely(enc_table[src[i]])) goto stop_char_##i; - + #define expr_stop(i) \ stop_char_##i: \ memcpy(cur, src, i); \ cur += i; src += i; goto copy_utf8; - + while (end - src >= 16) { - repeat16_incr(expr_jump); + repeat16_incr(expr_jump) byte_copy_16(cur, src); cur += 16; src += 16; } - + while (end - src >= 4) { - repeat4_incr(expr_jump); + repeat4_incr(expr_jump) byte_copy_4(cur, src); cur += 4; src += 4; } - + while (end > src) { - expr_jump(0); + expr_jump(0) *cur++ = *src++; } - + *cur++ = '"'; return cur; - - repeat16_incr(expr_stop); - + + repeat16_incr(expr_stop) + #undef expr_jump #undef expr_stop - + copy_utf8: if (unlikely(src + 4 > end)) { if (end == src) goto copy_end; @@ -6942,16 +8039,27 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, } case CHAR_ENC_CPY_2: { u16 v; +#if YYJSON_DISABLE_UTF8_VALIDATION + byte_copy_2(cur, src); +#else v = byte_load_2(src); if (unlikely(!is_valid_seq_2(v))) goto err_cpy; - byte_copy_2(cur, src); +#endif cur += 2; src += 2; goto copy_utf8; } case CHAR_ENC_CPY_3: { u32 v, tmp; +#if YYJSON_DISABLE_UTF8_VALIDATION + if (likely(src + 4 <= end)) { + byte_copy_4(cur, src); + } else { + byte_copy_2(cur, src); + cur[2] = src[2]; + } +#else if (likely(src + 4 <= end)) { v = byte_load_4(src); if (unlikely(!is_valid_seq_3(v))) goto err_cpy; @@ -6961,22 +8069,26 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, if (unlikely(!is_valid_seq_3(v))) goto err_cpy; byte_copy_4(cur, &v); } +#endif cur += 3; src += 3; goto copy_utf8; } case CHAR_ENC_CPY_4: { u32 v, tmp; +#if YYJSON_DISABLE_UTF8_VALIDATION + byte_copy_4(cur, src); +#else v = byte_load_4(src); if (unlikely(!is_valid_seq_4(v))) goto err_cpy; - byte_copy_4(cur, src); +#endif cur += 4; src += 4; goto copy_utf8; } case CHAR_ENC_ESC_A: { - byte_move_2(cur, &esc_single_char_table[*src * 2]); + byte_copy_2(cur, &esc_single_char_table[*src * 2]); cur += 2; src += 1; goto copy_utf8; @@ -6990,9 +8102,10 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, } case CHAR_ENC_ESC_2: { u16 u, v; +#if !YYJSON_DISABLE_UTF8_VALIDATION v = byte_load_2(src); if (unlikely(!is_valid_seq_2(v))) goto err_esc; - +#endif u = (u16)(((u16)(src[0] & 0x1F) << 6) | ((u16)(src[1] & 0x3F) << 0)); byte_copy_2(cur + 0, &pre); @@ -7005,9 +8118,10 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, case CHAR_ENC_ESC_3: { u16 u; u32 v, tmp; +#if !YYJSON_DISABLE_UTF8_VALIDATION v = byte_load_3(src); if (unlikely(!is_valid_seq_3(v))) goto err_esc; - +#endif u = (u16)(((u16)(src[0] & 0x0F) << 12) | ((u16)(src[1] & 0x3F) << 6) | ((u16)(src[2] & 0x3F) << 0)); @@ -7020,9 +8134,10 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, } case CHAR_ENC_ESC_4: { u32 hi, lo, u, v, tmp; +#if !YYJSON_DISABLE_UTF8_VALIDATION v = byte_load_4(src); if (unlikely(!is_valid_seq_4(v))) goto err_esc; - +#endif u = ((u32)(src[0] & 0x07) << 18) | ((u32)(src[1] & 0x3F) << 12) | ((u32)(src[2] & 0x3F) << 6) | @@ -7045,20 +8160,20 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, } default: break; } - + copy_end: *cur++ = '"'; return cur; - + err_one: if (esc) goto err_esc; else goto err_cpy; - + err_cpy: if (!inv) return NULL; *cur++ = *src++; goto copy_utf8; - + err_esc: if (!inv) return NULL; byte_copy_2(cur + 0, &pre); @@ -7066,7 +8181,7 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, cur += 6; src += 1; goto copy_utf8; - + #undef is_valid_seq_2 #undef is_valid_seq_3 #undef is_valid_seq_4 @@ -7080,15 +8195,15 @@ static_inline u8 *write_string(u8 *cur, bool esc, bool inv, /** Write null (requires 8 bytes buffer). */ static_inline u8 *write_null(u8 *cur) { - v64 v = { 'n', 'u', 'l', 'l', ',', '\n', 0, 0 }; + v64 v = {{ 'n', 'u', 'l', 'l', ',', '\n', 0, 0 }}; byte_copy_8(cur, &v); return cur + 4; } /** Write bool (requires 8 bytes buffer). */ static_inline u8 *write_bool(u8 *cur, bool val) { - v64 v0 = { 'f', 'a', 'l', 's', 'e', ',', '\n', 0 }; - v64 v1 = { 't', 'r', 'u', 'e', ',', '\n', 0, 0 }; + v64 v0 = {{ 'f', 'a', 'l', 's', 'e', ',', '\n', 0 }}; + v64 v1 = {{ 't', 'r', 'u', 'e', ',', '\n', 0, 0 }}; if (val) { byte_copy_8(cur, &v1); } else { @@ -7107,17 +8222,28 @@ static_inline u8 *write_indent(u8 *cur, usize level, usize spaces) { return cur; } +/** Write data to file pointer. */ +static bool write_dat_to_fp(FILE *fp, u8 *dat, usize len, + yyjson_write_err *err) { + if (fwrite(dat, len, 1, fp) != 1) { + err->msg = "file writing failed"; + err->code = YYJSON_WRITE_ERROR_FILE_WRITE; + return false; + } + return true; +} + /** Write data to file. */ static bool write_dat_to_file(const char *path, u8 *dat, usize len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ err->msg = _msg; \ err->code = YYJSON_WRITE_ERROR_##_code; \ if (file) fclose(file); \ return false; \ } while (false) - + FILE *file = fopen_writeonly(path); if (file == NULL) { return_err(FILE_OPEN, "file opening failed"); @@ -7130,7 +8256,7 @@ static bool write_dat_to_file(const char *path, u8 *dat, usize len, return_err(FILE_WRITE, "file closing failed"); } return true; - + #undef return_err } @@ -7162,7 +8288,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ if (hdr) alc.free(alc.ctx, (void *)hdr); \ *dat_len = 0; \ @@ -7170,80 +8296,88 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, err->msg = _msg; \ return NULL; \ } while (false) - + #define incr_len(_len) do { \ hdr = (u8 *)alc.malloc(alc.ctx, _len); \ if (!hdr) goto fail_alloc; \ cur = hdr; \ } while (false) - + #define check_str_len(_len) do { \ - if ((USIZE_MAX < U64_MAX) && (_len >= (USIZE_MAX - 16) / 6)) \ + if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \ goto fail_alloc; \ } while (false) - + u8 *hdr = NULL, *cur; usize str_len; const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); - bool esc = (flg & YYJSON_WRITE_ESCAPE_UNICODE) != 0; - bool inv = (flg & YYJSON_WRITE_ALLOW_INVALID_UNICODE) != 0; - + bool cpy = (enc_table == enc_table_cpy); + bool esc = has_write_flag(ESCAPE_UNICODE) != 0; + bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + const usize end_len = 2; /* '\n' and '\0' */ + switch (unsafe_yyjson_get_type(val)) { case YYJSON_TYPE_RAW: str_len = unsafe_yyjson_get_len(val); str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); - incr_len(str_len + 1); + incr_len(str_len + end_len); cur = write_raw(cur, str_ptr, str_len); break; - + case YYJSON_TYPE_STR: str_len = unsafe_yyjson_get_len(val); str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); - incr_len(str_len * 6 + 4); - cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); - if (unlikely(!cur)) goto fail_str; + incr_len(str_len * 6 + 2 + end_len); + if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { + cur = write_string_noesc(cur, str_ptr, str_len); + } else { + cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); + if (unlikely(!cur)) goto fail_str; + } break; - + case YYJSON_TYPE_NUM: - incr_len(32); + incr_len(32 + end_len); cur = write_number(cur, val, flg); if (unlikely(!cur)) goto fail_num; break; - + case YYJSON_TYPE_BOOL: incr_len(8); cur = write_bool(cur, unsafe_yyjson_get_bool(val)); break; - + case YYJSON_TYPE_NULL: incr_len(8); cur = write_null(cur); break; - + case YYJSON_TYPE_ARR: - incr_len(4); + incr_len(2 + end_len); byte_copy_2(cur, "[]"); cur += 2; break; - + case YYJSON_TYPE_OBJ: - incr_len(4); + incr_len(2 + end_len); byte_copy_2(cur, "{}"); cur += 2; break; - + default: goto fail_type; } - + + if (newline) *cur++ = '\n'; *cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); return hdr; - + fail_alloc: return_err(MEMORY_ALLOCATION, "memory allocation failed"); fail_type: @@ -7252,7 +8386,7 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, return_err(NAN_OR_INF, "nan or inf number is not allowed"); fail_str: return_err(INVALID_STRING, "invalid utf-8 encoding in string"); - + #undef return_err #undef check_str_len #undef incr_len @@ -7265,7 +8399,7 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, const yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -7273,13 +8407,14 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, if (hdr) alc.free(alc.ctx, hdr); \ return NULL; \ } while (false) - + #define incr_len(_len) do { \ ext_len = (usize)(_len); \ if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) { \ alc_inc = yyjson_max(alc_len / 2, ext_len); \ alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx)); \ - if (size_add_is_overflow(alc_len, alc_inc)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \ + goto fail_alloc; \ alc_len += alc_inc; \ tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \ if (unlikely(!tmp)) goto fail_alloc; \ @@ -7292,12 +8427,12 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, hdr = tmp; \ } \ } while (false) - + #define check_str_len(_len) do { \ - if ((USIZE_MAX < U64_MAX) && (_len >= (USIZE_MAX - 16) / 6)) \ + if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \ goto fail_alloc; \ } while (false) - + yyjson_val *val; yyjson_type val_type; usize ctn_len, ctn_len_tmp; @@ -7307,9 +8442,11 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, usize alc_len, alc_inc, ctx_len, ext_len, str_len; const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); - bool esc = (flg & YYJSON_WRITE_ESCAPE_UNICODE) != 0; - bool inv = (flg & YYJSON_WRITE_ALLOW_INVALID_UNICODE) != 0; - + bool cpy = (enc_table == enc_table_cpy); + bool esc = has_write_flag(ESCAPE_UNICODE) != 0; + bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx)); @@ -7318,15 +8455,15 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, cur = hdr; end = hdr + alc_len; ctx = (yyjson_write_ctx *)(void *)end; - + doc_begin: - val = (yyjson_val *)root; + val = constcast(yyjson_val *)root; val_type = unsafe_yyjson_get_type(val); ctn_obj = (val_type == YYJSON_TYPE_OBJ); ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj; *cur++ = (u8)('[' | ((u8)ctn_obj << 5)); val++; - + val_begin: val_type = unsafe_yyjson_get_type(val); if (val_type == YYJSON_TYPE_STR) { @@ -7335,8 +8472,12 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); incr_len(str_len * 6 + 16); - cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); - if (unlikely(!cur)) goto fail_str; + if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { + cur = write_string_noesc(cur, str_ptr, str_len); + } else { + cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); + if (unlikely(!cur)) goto fail_str; + } *cur++ = is_key ? ':' : ','; goto val_end; } @@ -7390,13 +8531,13 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, goto val_end; } goto fail_type; - + val_end: val++; ctn_len--; if (unlikely(ctn_len == 0)) goto ctn_end; goto val_begin; - + ctn_end: cur--; *cur++ = (u8)(']' | ((u8)ctn_obj << 5)); @@ -7409,13 +8550,18 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, } else { goto ctn_end; } - + doc_end: + if (newline) { + incr_len(2); + *(cur - 1) = '\n'; + cur++; + } *--cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); return hdr; - + fail_alloc: return_err(MEMORY_ALLOCATION, "memory allocation failed"); fail_type: @@ -7424,7 +8570,7 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root, return_err(NAN_OR_INF, "nan or inf number is not allowed"); fail_str: return_err(INVALID_STRING, "invalid utf-8 encoding in string"); - + #undef return_err #undef incr_len #undef check_str_len @@ -7437,7 +8583,7 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, const yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -7445,13 +8591,14 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, if (hdr) alc.free(alc.ctx, hdr); \ return NULL; \ } while (false) - + #define incr_len(_len) do { \ ext_len = (usize)(_len); \ if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) { \ alc_inc = yyjson_max(alc_len / 2, ext_len); \ alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx)); \ - if (size_add_is_overflow(alc_len, alc_inc)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \ + goto fail_alloc; \ alc_len += alc_inc; \ tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \ if (unlikely(!tmp)) goto fail_alloc; \ @@ -7464,12 +8611,12 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, hdr = tmp; \ } \ } while (false) - + #define check_str_len(_len) do { \ - if ((USIZE_MAX < U64_MAX) && (_len >= (USIZE_MAX - 16) / 6)) \ + if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \ goto fail_alloc; \ } while (false) - + yyjson_val *val; yyjson_type val_type; usize ctn_len, ctn_len_tmp; @@ -7479,10 +8626,12 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, usize alc_len, alc_inc, ctx_len, ext_len, str_len, level; const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); - bool esc = (flg & YYJSON_WRITE_ESCAPE_UNICODE) != 0; - bool inv = (flg & YYJSON_WRITE_ALLOW_INVALID_UNICODE) != 0; - usize spaces = (flg & YYJSON_WRITE_PRETTY_TWO_SPACES) ? 2 : 4; - + bool cpy = (enc_table == enc_table_cpy); + bool esc = has_write_flag(ESCAPE_UNICODE) != 0; + bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + alc_len = root->uni.ofs / sizeof(yyjson_val); alc_len = alc_len * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx)); @@ -7491,9 +8640,9 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, cur = hdr; end = hdr + alc_len; ctx = (yyjson_write_ctx *)(void *)end; - + doc_begin: - val = (yyjson_val *)root; + val = constcast(yyjson_val *)root; val_type = unsafe_yyjson_get_type(val); ctn_obj = (val_type == YYJSON_TYPE_OBJ); ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj; @@ -7501,7 +8650,7 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, *cur++ = '\n'; val++; level = 1; - + val_begin: val_type = unsafe_yyjson_get_type(val); if (val_type == YYJSON_TYPE_STR) { @@ -7512,8 +8661,12 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, check_str_len(str_len); incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4)); cur = write_indent(cur, no_indent ? 0 : level, spaces); - cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); - if (unlikely(!cur)) goto fail_str; + if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { + cur = write_string_noesc(cur, str_ptr, str_len); + } else { + cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); + if (unlikely(!cur)) goto fail_str; + } *cur++ = is_key ? ':' : ','; *cur++ = is_key ? ' ' : '\n'; goto val_end; @@ -7583,13 +8736,13 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, goto val_end; } goto fail_type; - + val_end: val++; ctn_len--; if (unlikely(ctn_len == 0)) goto ctn_end; goto val_begin; - + ctn_end: cur -= 2; *cur++ = '\n'; @@ -7606,13 +8759,17 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, } else { goto ctn_end; } - + doc_end: + if (newline) { + incr_len(2); + *cur++ = '\n'; + } *cur = '\0'; *dat_len = (usize)(cur - hdr); memset(err, 0, sizeof(yyjson_write_err)); return hdr; - + fail_alloc: return_err(MEMORY_ALLOCATION, "memory allocation failed"); fail_type: @@ -7621,7 +8778,7 @@ static_inline u8 *yyjson_write_pretty(const yyjson_val *root, return_err(NAN_OR_INF, "nan or inf number is not allowed"); fail_str: return_err(INVALID_STRING, "invalid utf-8 encoding in string"); - + #undef return_err #undef incr_len #undef check_str_len @@ -7635,23 +8792,18 @@ char *yyjson_val_write_opts(const yyjson_val *val, yyjson_write_err dummy_err; usize dummy_dat_len; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; - yyjson_val *root = (yyjson_val *)val; - + yyjson_val *root = constcast(yyjson_val *)val; + err = err ? err : &dummy_err; dat_len = dat_len ? dat_len : &dummy_dat_len; - -#if YYJSON_DISABLE_NON_STANDARD - flg &= ~YYJSON_WRITE_ALLOW_INF_AND_NAN; - flg &= ~YYJSON_WRITE_ALLOW_INVALID_UNICODE; -#endif - + if (unlikely(!root)) { *dat_len = 0; err->msg = "input JSON is NULL"; err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; return NULL; } - + if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) { return (char *)yyjson_write_single(root, flg, alc, dat_len, err); } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) { @@ -7678,9 +8830,9 @@ bool yyjson_val_write_file(const char *path, yyjson_write_err dummy_err; u8 *dat; usize dat_len = 0; - yyjson_val *root = (yyjson_val *)val; + yyjson_val *root = constcast(yyjson_val *)val; bool suc; - + alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC; err = err ? err : &dummy_err; if (unlikely(!path || !*path)) { @@ -7688,7 +8840,7 @@ bool yyjson_val_write_file(const char *path, err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; return false; } - + dat = (u8 *)yyjson_val_write_opts(root, flg, alc_ptr, &dat_len, err); if (unlikely(!dat)) return false; suc = write_dat_to_file(path, dat, dat_len, err); @@ -7696,6 +8848,32 @@ bool yyjson_val_write_file(const char *path, return suc; } +bool yyjson_val_write_fp(FILE *fp, + const yyjson_val *val, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + yyjson_write_err *err) { + yyjson_write_err dummy_err; + u8 *dat; + usize dat_len = 0; + yyjson_val *root = constcast(yyjson_val *)val; + bool suc; + + alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC; + err = err ? err : &dummy_err; + if (unlikely(!fp)) { + err->msg = "input fp is invalid"; + err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; + return false; + } + + dat = (u8 *)yyjson_val_write_opts(root, flg, alc_ptr, &dat_len, err); + if (unlikely(!dat)) return false; + suc = write_dat_to_fp(fp, dat, dat_len, err); + alc_ptr->free(alc_ptr->ctx, dat); + return suc; +} + bool yyjson_write_file(const char *path, const yyjson_doc *doc, yyjson_write_flag flg, @@ -7705,6 +8883,15 @@ bool yyjson_write_file(const char *path, return yyjson_val_write_file(path, root, flg, alc_ptr, err); } +bool yyjson_write_fp(FILE *fp, + const yyjson_doc *doc, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + yyjson_write_err *err) { + yyjson_val *root = doc ? doc->root : NULL; + return yyjson_val_write_fp(fp, root, flg, alc_ptr, err); +} + /*============================================================================== @@ -7732,6 +8919,21 @@ static_inline void yyjson_mut_write_ctx_get(yyjson_mut_write_ctx *ctx, *ctn = ctx->ctn; } +/** Get the estimated number of values for the mutable JSON document. */ +static_inline usize yyjson_mut_doc_estimated_val_num( + const yyjson_mut_doc *doc) { + usize sum = 0; + yyjson_val_chunk *chunk = doc->val_pool.chunks; + while (chunk) { + sum += chunk->chunk_size / sizeof(yyjson_mut_val) - 1; + if (chunk == doc->val_pool.chunks) { + sum -= (usize)(doc->val_pool.end - doc->val_pool.cur); + } + chunk = chunk->next; + } + return sum; +} + /** Write single JSON value. */ static_inline u8 *yyjson_mut_write_single(yyjson_mut_val *val, yyjson_write_flag flg, @@ -7744,11 +8946,12 @@ static_inline u8 *yyjson_mut_write_single(yyjson_mut_val *val, /** Write JSON document minify. The root of this document should be a non-empty container. */ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, + usize estimated_val_num, yyjson_write_flag flg, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -7756,13 +8959,14 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, if (hdr) alc.free(alc.ctx, hdr); \ return NULL; \ } while (false) - + #define incr_len(_len) do { \ ext_len = (usize)(_len); \ if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) { \ alc_inc = yyjson_max(alc_len / 2, ext_len); \ alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx)); \ - if (size_add_is_overflow(alc_len, alc_inc)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \ + goto fail_alloc; \ alc_len += alc_inc; \ tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \ if (unlikely(!tmp)) goto fail_alloc; \ @@ -7775,12 +8979,12 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, hdr = tmp; \ } \ } while (false) - + #define check_str_len(_len) do { \ - if ((USIZE_MAX < U64_MAX) && (_len >= (USIZE_MAX - 16) / 6)) \ + if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \ goto fail_alloc; \ } while (false) - + yyjson_mut_val *val, *ctn; yyjson_type val_type; usize ctn_len, ctn_len_tmp; @@ -7790,19 +8994,21 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, usize alc_len, alc_inc, ctx_len, ext_len, str_len; const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); - bool esc = (flg & YYJSON_WRITE_ESCAPE_UNICODE) != 0; - bool inv = (flg & YYJSON_WRITE_ALLOW_INVALID_UNICODE) != 0; - - alc_len = 0 * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; + bool cpy = (enc_table == enc_table_cpy); + bool esc = has_write_flag(ESCAPE_UNICODE) != 0; + bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + + alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); hdr = (u8 *)alc.malloc(alc.ctx, alc_len); if (!hdr) goto fail_alloc; cur = hdr; end = hdr + alc_len; ctx = (yyjson_mut_write_ctx *)(void *)end; - + doc_begin: - val = (yyjson_mut_val *)root; + val = constcast(yyjson_mut_val *)root; val_type = unsafe_yyjson_get_type(val); ctn_obj = (val_type == YYJSON_TYPE_OBJ); ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj; @@ -7810,7 +9016,7 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, ctn = val; val = (yyjson_mut_val *)val->uni.ptr; /* tail */ val = ctn_obj ? val->next->next : val->next; - + val_begin: val_type = unsafe_yyjson_get_type(val); if (val_type == YYJSON_TYPE_STR) { @@ -7819,8 +9025,12 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, str_ptr = (const u8 *)unsafe_yyjson_get_str(val); check_str_len(str_len); incr_len(str_len * 6 + 16); - cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); - if (unlikely(!cur)) goto fail_str; + if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { + cur = write_string_noesc(cur, str_ptr, str_len); + } else { + cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); + if (unlikely(!cur)) goto fail_str; + } *cur++ = is_key ? ':' : ','; goto val_end; } @@ -7876,13 +9086,13 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, goto val_end; } goto fail_type; - + val_end: ctn_len--; if (unlikely(ctn_len == 0)) goto ctn_end; val = val->next; goto val_begin; - + ctn_end: cur--; *cur++ = (u8)(']' | ((u8)ctn_obj << 5)); @@ -7896,14 +9106,19 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, } else { goto ctn_end; } - + doc_end: + if (newline) { + incr_len(2); + *(cur - 1) = '\n'; + cur++; + } *--cur = '\0'; *dat_len = (usize)(cur - hdr); err->code = YYJSON_WRITE_SUCCESS; err->msg = "success"; return hdr; - + fail_alloc: return_err(MEMORY_ALLOCATION, "memory allocation failed"); fail_type: @@ -7912,7 +9127,7 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, return_err(NAN_OR_INF, "nan or inf number is not allowed"); fail_str: return_err(INVALID_STRING, "invalid utf-8 encoding in string"); - + #undef return_err #undef incr_len #undef check_str_len @@ -7921,11 +9136,12 @@ static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root, /** Write JSON document pretty. The root of this document should be a non-empty container. */ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, + usize estimated_val_num, yyjson_write_flag flg, yyjson_alc alc, usize *dat_len, yyjson_write_err *err) { - + #define return_err(_code, _msg) do { \ *dat_len = 0; \ err->code = YYJSON_WRITE_ERROR_##_code; \ @@ -7933,13 +9149,14 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, if (hdr) alc.free(alc.ctx, hdr); \ return NULL; \ } while (false) - + #define incr_len(_len) do { \ ext_len = (usize)(_len); \ if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) { \ alc_inc = yyjson_max(alc_len / 2, ext_len); \ alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx)); \ - if (size_add_is_overflow(alc_len, alc_inc)) goto fail_alloc; \ + if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \ + goto fail_alloc; \ alc_len += alc_inc; \ tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \ if (unlikely(!tmp)) goto fail_alloc; \ @@ -7952,12 +9169,12 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, hdr = tmp; \ } \ } while (false) - + #define check_str_len(_len) do { \ - if ((USIZE_MAX < U64_MAX) && (_len >= (USIZE_MAX - 16) / 6)) \ + if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \ goto fail_alloc; \ } while (false) - + yyjson_mut_val *val, *ctn; yyjson_type val_type; usize ctn_len, ctn_len_tmp; @@ -7967,20 +9184,22 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, usize alc_len, alc_inc, ctx_len, ext_len, str_len, level; const u8 *str_ptr; const char_enc_type *enc_table = get_enc_table_with_flag(flg); - bool esc = (flg & YYJSON_WRITE_ESCAPE_UNICODE) != 0; - bool inv = (flg & YYJSON_WRITE_ALLOW_INVALID_UNICODE) != 0; - usize spaces = (flg & YYJSON_WRITE_PRETTY_TWO_SPACES) ? 2 : 4; - - alc_len = 0 * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; + bool cpy = (enc_table == enc_table_cpy); + bool esc = has_write_flag(ESCAPE_UNICODE) != 0; + bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0; + usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4; + bool newline = has_write_flag(NEWLINE_AT_END) != 0; + + alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64; alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx)); hdr = (u8 *)alc.malloc(alc.ctx, alc_len); if (!hdr) goto fail_alloc; cur = hdr; end = hdr + alc_len; ctx = (yyjson_mut_write_ctx *)(void *)end; - + doc_begin: - val = (yyjson_mut_val *)root; + val = constcast(yyjson_mut_val *)root; val_type = unsafe_yyjson_get_type(val); ctn_obj = (val_type == YYJSON_TYPE_OBJ); ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj; @@ -7990,7 +9209,7 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, val = (yyjson_mut_val *)val->uni.ptr; /* tail */ val = ctn_obj ? val->next->next : val->next; level = 1; - + val_begin: val_type = unsafe_yyjson_get_type(val); if (val_type == YYJSON_TYPE_STR) { @@ -8001,8 +9220,12 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, check_str_len(str_len); incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4)); cur = write_indent(cur, no_indent ? 0 : level, spaces); - cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); - if (unlikely(!cur)) goto fail_str; + if (likely(cpy) && unsafe_yyjson_get_subtype(val)) { + cur = write_string_noesc(cur, str_ptr, str_len); + } else { + cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table); + if (unlikely(!cur)) goto fail_str; + } *cur++ = is_key ? ':' : ','; *cur++ = is_key ? ' ' : '\n'; goto val_end; @@ -8074,13 +9297,13 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, goto val_end; } goto fail_type; - + val_end: ctn_len--; if (unlikely(ctn_len == 0)) goto ctn_end; val = val->next; goto val_begin; - + ctn_end: cur -= 2; *cur++ = '\n'; @@ -8098,14 +9321,18 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, } else { goto ctn_end; } - + doc_end: + if (newline) { + incr_len(2); + *cur++ = '\n'; + } *cur = '\0'; *dat_len = (usize)(cur - hdr); err->code = YYJSON_WRITE_SUCCESS; err->msg = "success"; return hdr; - + fail_alloc: return_err(MEMORY_ALLOCATION, "memory allocation failed"); fail_type: @@ -8114,53 +9341,68 @@ static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root, return_err(NAN_OR_INF, "nan or inf number is not allowed"); fail_str: return_err(INVALID_STRING, "invalid utf-8 encoding in string"); - + #undef return_err #undef incr_len #undef check_str_len } -char *yyjson_mut_val_write_opts(const yyjson_mut_val *val, - yyjson_write_flag flg, - const yyjson_alc *alc_ptr, - usize *dat_len, - yyjson_write_err *err) { +static char *yyjson_mut_write_opts_impl(const yyjson_mut_val *val, + usize estimated_val_num, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + usize *dat_len, + yyjson_write_err *err) { yyjson_write_err dummy_err; usize dummy_dat_len; yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC; - yyjson_mut_val *root = (yyjson_mut_val *)val; - + yyjson_mut_val *root = constcast(yyjson_mut_val *)val; + err = err ? err : &dummy_err; dat_len = dat_len ? dat_len : &dummy_dat_len; - -#if YYJSON_DISABLE_NON_STANDARD - flg &= ~YYJSON_WRITE_ALLOW_INF_AND_NAN; - flg &= ~YYJSON_WRITE_ALLOW_INVALID_UNICODE; -#endif - + if (unlikely(!root)) { *dat_len = 0; err->msg = "input JSON is NULL"; err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER; return NULL; } - + if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) { return (char *)yyjson_mut_write_single(root, flg, alc, dat_len, err); } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) { - return (char *)yyjson_mut_write_pretty(root, flg, alc, dat_len, err); + return (char *)yyjson_mut_write_pretty(root, estimated_val_num, + flg, alc, dat_len, err); } else { - return (char *)yyjson_mut_write_minify(root, flg, alc, dat_len, err); + return (char *)yyjson_mut_write_minify(root, estimated_val_num, + flg, alc, dat_len, err); } } +char *yyjson_mut_val_write_opts(const yyjson_mut_val *val, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + usize *dat_len, + yyjson_write_err *err) { + return yyjson_mut_write_opts_impl(val, 0, flg, alc_ptr, dat_len, err); +} + char *yyjson_mut_write_opts(const yyjson_mut_doc *doc, yyjson_write_flag flg, const yyjson_alc *alc_ptr, usize *dat_len, yyjson_write_err *err) { - yyjson_mut_val *root = doc ? doc->root : NULL; - return yyjson_mut_val_write_opts(root, flg, alc_ptr, dat_len, err); + yyjson_mut_val *root; + usize estimated_val_num; + if (likely(doc)) { + root = doc->root; + estimated_val_num = yyjson_mut_doc_estimated_val_num(doc); + } else { + root = NULL; + estimated_val_num = 0; + } + return yyjson_mut_write_opts_impl(root, estimated_val_num, + flg, alc_ptr, dat_len, err); } bool yyjson_mut_val_write_file(const char *path, @@ -8171,9 +9413,9 @@ bool yyjson_mut_val_write_file(const char *path, yyjson_write_err dummy_err; u8 *dat; usize dat_len = 0; - yyjson_mut_val *root = (yyjson_mut_val *)val; + yyjson_mut_val *root = constcast(yyjson_mut_val *)val; bool suc; - + alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC; err = err ? err : &dummy_err; if (unlikely(!path || !*path)) { @@ -8181,13 +9423,38 @@ bool yyjson_mut_val_write_file(const char *path, err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER; return false; } - + dat = (u8 *)yyjson_mut_val_write_opts(root, flg, alc_ptr, &dat_len, err); if (unlikely(!dat)) return false; suc = write_dat_to_file(path, dat, dat_len, err); alc_ptr->free(alc_ptr->ctx, dat); return suc; - +} + +bool yyjson_mut_val_write_fp(FILE *fp, + const yyjson_mut_val *val, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + yyjson_write_err *err) { + yyjson_write_err dummy_err; + u8 *dat; + usize dat_len = 0; + yyjson_mut_val *root = constcast(yyjson_mut_val *)val; + bool suc; + + alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC; + err = err ? err : &dummy_err; + if (unlikely(!fp)) { + err->msg = "input fp is invalid"; + err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER; + return false; + } + + dat = (u8 *)yyjson_mut_val_write_opts(root, flg, alc_ptr, &dat_len, err); + if (unlikely(!dat)) return false; + suc = write_dat_to_fp(fp, dat, dat_len, err); + alc_ptr->free(alc_ptr->ctx, dat); + return suc; } bool yyjson_mut_write_file(const char *path, @@ -8199,13 +9466,14 @@ bool yyjson_mut_write_file(const char *path, return yyjson_mut_val_write_file(path, root, flg, alc_ptr, err); } -#endif /* YYJSON_DISABLE_WRITER */ - - - -/*============================================================================== - * Compiler Hint End - *============================================================================*/ +bool yyjson_mut_write_fp(FILE *fp, + const yyjson_mut_doc *doc, + yyjson_write_flag flg, + const yyjson_alc *alc_ptr, + yyjson_write_err *err) { + yyjson_mut_val *root = doc ? doc->root : NULL; + return yyjson_mut_val_write_fp(fp, root, flg, alc_ptr, err); +} #if defined(__clang__) # pragma clang diagnostic pop @@ -8217,4 +9485,6 @@ bool yyjson_mut_write_file(const char *path, # pragma warning(pop) #endif /* warning suppress end */ -} // namespace duckdb_yyjson \ No newline at end of file +#endif /* YYJSON_DISABLE_WRITER */ + +} // namespace duckdb_yyjson