From 53aada1637eafcbe301f160437d1345ff9c285e9 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Sun, 19 Feb 2023 12:43:27 +0200 Subject: [PATCH] Add reflection to build Add reflection support to libfyaml. No hooking with the library is done yet, merely it is added to the build. Wip on reflection, more wip, more fixes and wip, prune system, clang fixes, Fix for !libclang, more clang fixes, more clang fixes #2, type include exclude and stuff, eponymous offsets, doc entries, cooking, generics, more wip on generics, generics continue, more cleanup of generics, generics continued, generics continued#3, goind on, destroy, more allocator fun, getting there, allocators, generic_allocator -> allocator, reorg generic, cleanup of generics, indirect document, generic lookups and compares, emitting now, oops, deduping in progress, deduping in progress 2, dedup working, emitter, alias resolution, tag-reorg complete, wip dedup, dedup threshold, mremap arena done, working id, remove schema stuff now, gd -> gdp, ready for some tightening, more streamlining, indirect wip, indirect wip #2, alias wip, tag fun, error reporting and unwinding, tag fun continued, emitter stuff, allocas, ongoing, tag fun continues, alloca, remove anchor from generic document, removed generic destroy, generic copy, decoder, decoder move, encoder in progress, encoder moved, debug, memset encoder, going for carbonite, in progress, wip best allocator, streamline allocators, reset, decode/encode all documents, relocation, caching in the usa, indirects, more indirect, packed indirect, generic merge keys, auto alloc finishing, fix ws, normalize type names Signed-off-by: Pantelis Antoniou --- include/libfyaml.h | 981 ++++++++ libfyaml.pc.in | 2 +- src/Makefile.am | 26 +- src/internal/libfyaml-parser.c | 1088 ++++++++- src/reflection/fy-clang-backend.c | 1182 ++++++++++ src/reflection/fy-clang-backend.h | 27 + src/reflection/fy-packed-backend.c | 1262 ++++++++++ src/reflection/fy-packed-backend.h | 263 +++ src/reflection/fy-reflection-private.h | 425 ++++ src/reflection/fy-reflection.c | 3010 ++++++++++++++++++++++++ src/reflection/fy-registry.c | 226 ++ src/tool/fy-tool.c | 2219 +++++++++++++++-- src/util/fy-allocator-auto.c | 337 +++ src/util/fy-allocator-auto.h | 37 + src/util/fy-allocator-dedup.c | 1033 ++++++++ src/util/fy-allocator-dedup.h | 85 + src/util/fy-allocator-linear.c | 330 +++ src/util/fy-allocator-linear.h | 33 + src/util/fy-allocator-malloc.c | 428 ++++ src/util/fy-allocator-malloc.h | 46 + src/util/fy-allocator-mremap.c | 794 +++++++ src/util/fy-allocator-mremap.h | 82 + src/util/fy-allocator.c | 311 +++ src/util/fy-allocator.h | 193 ++ src/util/fy-generic-decoder.c | 894 +++++++ src/util/fy-generic-decoder.h | 81 + src/util/fy-generic-encoder.c | 337 +++ src/util/fy-generic-encoder.h | 35 + src/util/fy-generic.c | 801 +++++++ src/util/fy-generic.h | 1016 ++++++++ src/util/fy-utils.h | 38 +- src/util/fy-vlsize.h | 378 +++ test/Makefile.am | 3 +- 33 files changed, 17763 insertions(+), 240 deletions(-) create mode 100644 src/reflection/fy-clang-backend.c create mode 100644 src/reflection/fy-clang-backend.h create mode 100644 src/reflection/fy-packed-backend.c create mode 100644 src/reflection/fy-packed-backend.h create mode 100644 src/reflection/fy-reflection-private.h create mode 100644 src/reflection/fy-reflection.c create mode 100644 src/reflection/fy-registry.c create mode 100644 src/util/fy-allocator-auto.c create mode 100644 src/util/fy-allocator-auto.h create mode 100644 src/util/fy-allocator-dedup.c create mode 100644 src/util/fy-allocator-dedup.h create mode 100644 src/util/fy-allocator-linear.c create mode 100644 src/util/fy-allocator-linear.h create mode 100644 src/util/fy-allocator-malloc.c create mode 100644 src/util/fy-allocator-malloc.h create mode 100644 src/util/fy-allocator-mremap.c create mode 100644 src/util/fy-allocator-mremap.h create mode 100644 src/util/fy-allocator.c create mode 100644 src/util/fy-allocator.h create mode 100644 src/util/fy-generic-decoder.c create mode 100644 src/util/fy-generic-decoder.h create mode 100644 src/util/fy-generic-encoder.c create mode 100644 src/util/fy-generic-encoder.h create mode 100644 src/util/fy-generic.c create mode 100644 src/util/fy-generic.h create mode 100644 src/util/fy-vlsize.h diff --git a/include/libfyaml.h b/include/libfyaml.h index 73981298..595db3dd 100644 --- a/include/libfyaml.h +++ b/include/libfyaml.h @@ -7809,6 +7809,987 @@ bool fy_document_iterator_get_error(struct fy_document_iterator *fydi) FY_EXPORT; +/** + * Reflection and schema support + * + * A very limited public interface is provided mainly for use with + * libfyaml, but it should be enough to cover most cases. + * + * Treat the interface provided until version 1.0 as experimental + */ + +struct fy_type_info; +struct fy_field_info; +struct fy_enum_info; + +/* NOTE: order is very important, we rely on ranges for quick computation */ + +/** + * enum fy_type_kind - The types of the reflection plumbing + * + * @FYTK_INVALID: Invalid type + * @FYTK_VOID: The void type + * @FYTK_BOOL: The boolean type + * @FYTK_CHAR: The native char type + * @FYTK_SCHAR: The signed char type + * @FYTK_UCHAR: The unsigned char type + * @FYTK_SHORT: The signed short type + * @FYTK_USHORT: The unsigned short type + * @FYTK_INT: The int type + * @FYTK_UINT: The unsigned int type + * @FYTK_LONG: The long type + * @FYTK_ULONG: The unsigned long type + * @FYTK_LONGLONG: The long long type + * @FYTK_ULONGLONG: The unsigned long long type + * @FYTK_INT128: A signed int 128 bit type (may not be available on all arches) + * @FYTK_UINT128: An unsigned int 128 bit type (may not be available on all arches) + * @FYTK_FLOAT: The float type + * @FYTK_DOUBLE: The double type + * @FYTK_LONGDOUBLE: The long double type + * @FYTK_FLOAT16: A 16 bit float type (may not be available on all arches) + * @FYTK_FLOAT128: A 128 bit float type (may not be available on all arches) + * + * @FYTK_S8: The explicitly sized signed 8 bit type + * @FYTK_U8: The explicitly sized unsigned 8 bit type + * @FYTK_S16: The explicitly sized signed 16 bit type + * @FYTK_U16: The explicitly sized unsigned 16 bit type + * @FYTK_S32: The explicitly sized signed 32 bit type + * @FYTK_U32: The explicitly sized unsigned 32 bit type + * @FYTK_S64: The explicitly sized signed 64 bit type + * @FYTK_U64: The explicitly sized unsigned 64 bit type + * @FYTK_S128: The explicitly sized signed 128 bit type (may not be available on all arches) + * @FYTK_U128: The explicitly sized unsigned 128 bit type (may not be available on all arches) + * + * @FYTK_RECORD: A generic record type (not used for C) + * @FYTK_STRUCT: A struct type + * @FYTK_UNION: A union type + * + * @FYTK_ENUM: An enumeration type + * @FYTK_TYPEDEF: A typedef type + * @FYTK_PTR: A pointer type + * @FYTK_CONSTARRAY: A constant array type + * @FYTK_INCOMPLETEARRAY: An incomplete array type + * + * @FYTK_FUNCTION: A function type + * + */ +enum fy_type_kind { + FYTK_INVALID, + + /* built-in C types (without an explicit size) */ + FYTK_VOID, + FYTK_BOOL, + FYTK_CHAR, + FYTK_SCHAR, + FYTK_UCHAR, + FYTK_SHORT, + FYTK_USHORT, + FYTK_INT, + FYTK_UINT, + FYTK_LONG, + FYTK_ULONG, + FYTK_LONGLONG, + FYTK_ULONGLONG, + FYTK_INT128, + FYTK_UINT128, + FYTK_FLOAT, + FYTK_DOUBLE, + FYTK_LONGDOUBLE, + FYTK_FLOAT16, + FYTK_FLOAT128, + + /* explicitly sized types */ + FYTK_S8, + FYTK_U8, + FYTK_S16, + FYTK_U16, + FYTK_S32, + FYTK_U32, + FYTK_S64, + FYTK_U64, + FYTK_S128, + FYTK_U128, + + /* compound */ + FYTK_RECORD, /* generic struct, union, class */ + FYTK_STRUCT, + FYTK_UNION, + + FYTK_ENUM, + FYTK_TYPEDEF, + FYTK_PTR, + FYTK_CONSTARRAY, + FYTK_INCOMPLETEARRAY, + + FYTK_FUNCTION, +}; + +// define FY_HAS_FP16 if __fp16 is available +#if defined(__is_identifier) +#if ! __is_identifier(__fp16) +#define FY_HAS_FP16 +#endif +#endif + +// define FY_HAS_FLOAT128 if __fp128 is available +#if defined(__SIZEOF_FLOAT128__) +#if __SIZEOF_FLOAT128__ == 16 +#define FY_HAS_FLOAT128 +#endif +#endif + +// define FY_HAS_INT128 if __int128 is available +#if defined(__SIZEOF_INT128__) +#if __SIZEOF_INT128__ == 16 +#define FY_HAS_INT128 +#endif +#endif + +#ifdef UINTPTR_MAX +#if UINTPTR_MAX == 0xffffffff +#define FY_HAS_32BIT_PTR +#elif UINTPTR_MAX == 0xffffffffffffffff +#define FY_HAS_64BIT_PTR +#endif +#endif + +/* The count of types */ +#define FYTK_COUNT (FYTK_FUNCTION + 1) + +/** + * fy_type_kind_is_valid() - Check type kind for validity + * + * Check whether the type kind is valid. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if valid, false otherwise + */ +static inline bool fy_type_kind_is_valid(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_VOID && type_kind <= FYTK_FUNCTION; +} + +/** + * fy_type_kind_is_primitive() - Check if it's a primitive type kind + * + * Check whether the type kind is for a primitive C type + * + * @type_kind: The type_kind to check + * + * Returns: + * true if primitive, false otherwise + */ +static inline bool fy_type_kind_is_primitive(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_VOID && type_kind <= FYTK_U128; +} + +/** + * fy_type_kind_is_like_ptr() - Check if it's pointer like type + * + * Check whether the type kind matches a pointer like use, + * which is pointer, constant array or incomplete array. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if pointer like, false otherwise + */ +static inline bool fy_type_kind_is_like_ptr(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_PTR && type_kind <= FYTK_INCOMPLETEARRAY; +} + +/** + * fy_type_kind_is_record() - Check if it's a record like type + * + * Check whether the type kind contains other types in a record + * like structure, like a struct or union. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if record, false otherwise + */ +static inline bool fy_type_kind_is_record(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_RECORD && type_kind <= FYTK_UNION; +} + +/** + * fy_type_kind_is_numeric() - Check if it's a numeric type + * + * Check whether the type kind points to a number, either + * integer or float + * + * @type_kind: The type_kind to check + * + * Returns: + * true if numeric, false otherwise + */ +static inline bool fy_type_kind_is_numeric(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_BOOL && type_kind <= FYTK_FLOAT128; +} + +/** + * fy_type_kind_is_enum_constant_decl() - Check if it's a type that can be an enum + * + * Check whether the type kind points to something that is a valid enum constant + * declaration. + * For normal cases it's >= int but for weird packed cases can be something smaller. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if it is a type than can be an enum constant declaration, false otherwise + */ +static inline bool fy_type_kind_is_enum_constant_decl(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_CHAR && type_kind <= FYTK_ULONGLONG; +} + +/** + * fy_type_kind_has_fields() - Check if the type has fields + * + * Check whether the type kind has fields, either if it's a record + * or an enumeration type. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if it has fields, false otherwise + */ +static inline bool fy_type_kind_has_fields(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_STRUCT && type_kind <= FYTK_ENUM; +} + +/** + * fy_type_kind_has_prefix() - Check if the type requires a prefix + * + * Check whether the type kind requires a prefix when displayed, + * ie. like struct union or enum types. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if it has prefix, false otherwise + */ +static inline bool fy_type_kind_has_prefix(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_STRUCT && type_kind <= FYTK_ENUM; +} + +/** + * fy_type_kind_is_dependent() - Check if the type is dependent on another + * + * Check whether the type kind is dependent on another, i.e. + * a typedef. An enum is also dependent because the underlying type + * matches the range of the enum values. + * + * @type_kind: The type_kind to check + * + * Returns: + * true if it is dependent, false otherwise + */ +static inline bool fy_type_kind_is_dependent(enum fy_type_kind type_kind) +{ + return type_kind >= FYTK_ENUM && type_kind <= FYTK_INCOMPLETEARRAY; +} + +/** + * fy_type_kind_signess() - Find out the type's sign + * + * Check how the type deals with signs. + * + * @type_kind: The type_kind to check + * + * Returns: + * -1 signed, 1 unsigned, 0 not relevant for this type + */ +int fy_type_kind_signess(enum fy_type_kind type_kind) + FY_EXPORT; + +/** + * struct fy_type_kind_info - Information about types + * + * @kind: The type's kind id + * @name: The name of the type (i.e. int, struct) + * @enum_name: The name of the type_kind enum (for code generation) + * @size: The size of the type + * @align: The alignment of the type + * + * This structure contains information about each type kind + * we defined. + */ +struct fy_type_kind_info { + enum fy_type_kind kind; + const char *name; + const char *enum_name; + size_t size; + size_t align; +}; + +/** + * fy_type_kind_info_get() - Get the type info of a type from it's id + * + * Retrieve the type info structure from a type kind id. + * + * @type_kind: The type_kind + * + * Returns: + * The info structure that corresponds to the id, or NULL if invalid argument + */ +const struct fy_type_kind_info * +fy_type_kind_info_get(enum fy_type_kind type_kind) + FY_EXPORT; + +/** + * enum fy_field_info_flags - Flags for a field entry + * + * @FYFIF_ANONYMOUS: Set if the declaration was anonymous + * @FYFIF_BITFIELD: Set if the field is a bitfield and not a regular field + * @FYFIF_ENUM_UNSIGNED: Set if the enum value is unsigned + */ +enum fy_field_info_flags { + FYFIF_ANONYMOUS = FY_BIT(0), + FYFIF_BITFIELD = FY_BIT(1), + FYFIF_ENUM_UNSIGNED = FY_BIT(1), /* same bit */ +}; + +/** + * struct fy_field_info - Information of a field of a record/enum type + * + * @flags: Flags that pertain to this entry + * @name: The name of the field + * @type_info: Type of this field + * @offset: Byte offset if regular field of struct/union + * @bit_offset: The bit offset of this bit field + * @bit_width: The bit width of this bit field + * @uval: The unsigned enum value of this field + * @sval: The signed enum value of this field + */ +struct fy_field_info { + enum fy_field_info_flags flags; + const struct fy_type_info *parent; + const char *name; + const struct fy_type_info *type_info; + union { + size_t offset; /* regular field */ + struct { + size_t bit_offset; /* bitfield */ + size_t bit_width; + }; + unsigned long long uval; /* enum value */ + signed long long sval; + }; +}; + +/** + * enum fy_type_info_flags - Flags for a a type info entry + * + * @FYTIF_CONST: Const qualifier for this type enabled + * @FYTIF_VOLATILE: Volatile qualifier for this type enabled + * @FYTIF_RESTRICT: Restrict qualified for this type enabled + * @FYTIF_UNRESOLVED_PTR: This type is unresolved + * @FYTIF_MAIN_FILE: The type was declared in the main file of an import + * @FYTIF_SYSTEM_HEADER: The type was declared in a system header + * @FYTIF_ANONYMOUS: The type is anonymous, ie. declared in place. + */ +enum fy_type_info_flags { + FYTIF_CONST = FY_BIT(0), + FYTIF_VOLATILE = FY_BIT(1), + FYTIF_RESTRICT = FY_BIT(2), + FYTIF_UNRESOLVED_PTR = FY_BIT(4), /* when pointer is declared but not resolved */ + FYTIF_MAIN_FILE = FY_BIT(5), /* type declaration in main file */ + FYTIF_SYSTEM_HEADER = FY_BIT(6), /* type declaration in a system header */ + FYTIF_ANONYMOUS = FY_BIT(7), /* type is anonymous */ +}; + +/** + * struct fy_type_info - Information of a type + * + * @kind: The kind of this type + * @flags: Flags that pertain to this type + * @name: The name of the type + * @fullname: The full name of the type (including the prefix) + * @size: The size of the type + * @align: The alignment of the type + * @dependent_type: The type this one is dependent on (i.e typedef) + * @count: The number of fields, or the element count for const array + * @fields: The fields of the type + */ +struct fy_type_info { + enum fy_type_kind kind; + enum fy_type_info_flags flags; + const char *name; /* the name */ + const char *fullname; /* struct foo, enum bar, int, typedef */ + const char *normalized_name; /* same as full name, but normalized */ + size_t size; + size_t align; + const struct fy_type_info *dependent_type; /* for ptr, typedef, enum and arrays */ + size_t count; /* for constant arrays, union, struct, enums */ + const struct fy_field_info *fields; +}; + +/* fwd declaration */ +struct fy_reflection; + +/** + * fy_reflection_destroy() - Destroy a reflection + * + * Destroy a reflection that was previously created + * + * @rfl: The reflection + * + */ +void +fy_reflection_destroy(struct fy_reflection *rfl) + FY_EXPORT; + +/** + * fy_reflection_clear_all_marker() - Clear all markers + * + * Clear all markers put on types of the reflection + * + * @rfl: The reflection + * + */ +void +fy_reflection_clear_all_markers(struct fy_reflection *rfl) + FY_EXPORT; + +/** + * fy_reflection_prune_unmarker() - Remove all unmarked types + * + * Remove all unmarked type of the reflection. + * + * @rfl: The reflection + */ +void +fy_reflection_prune_unmarked(struct fy_reflection *rfl) + FY_EXPORT; + +/** + * fy_reflection_is_resolved() - Test whether the reflection is resolved. + * + * Check whether a reflection is fully resolved, i.e. no types are referring + * to undefined types. + * + * @rfl: The reflection + * + * Returns: + * true if the reflection is resolved, false if there are unresolved references + */ +bool +fy_reflection_is_resolved(struct fy_reflection *rfl) + FY_EXPORT; + +/** + * fy_reflection_from_imports() - Create a reflection from imports + * + * Create a reflection by the imports of the given backend. + * + * @backend_name: The name of the backend + * @backend_cfg: The configuration of the backend + * @num_imports: The number of imports + * @import_cfgs: The array of import configs. + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_imports(const char *backend_name, const void *backend_cfg, + int num_imports, const void *import_cfgs[]) + FY_EXPORT; + +/** + * fy_reflection_from_import() - Create a reflection from an import + * + * Create a reflection by a single import of the given backend. + * + * @backend_name: The name of the backend + * @backend_cfg: The configuration of the backend + * @import_cfg: The import configuration + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_import(const char *backend_name, const void *backend_cfg, const void *import_cfg) + FY_EXPORT; + +/** + * fy_reflection_from_c_files() - Create a reflection from C files + * + * Create a reflection from C source files + * + * @filec: Number of files + * @filev: An array of files + * @argc: Number of arguments to pass to libclang + * @argv: Arguments to pass to libclang + * @display_diagnostics: Display diagnostics (useful in case of errors) + * @include_comments: Include comments in the type database + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_c_files(int filec, const char * const filev[], int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) + FY_EXPORT; + +/** + * fy_reflection_from_c_file() - Create a reflection from a single C file + * + * Create a reflection from a single C source file + * + * @file: The C file + * @argc: Number of arguments to pass to libclang + * @argv: Arguments to pass to libclang + * @display_diagnostics: Display diagnostics (useful in case of errors) + * @include_comments: Include comments in the type database + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_c_file(const char *file, int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) + FY_EXPORT; + +/** + * fy_reflection_from_c_file_with_cflags() - Create a reflection from a single C file with CFLAGS + * + * Create a reflection from a single C source file, using a simpler CFLAGS api + * + * @file: The C file + * @cflags: The C flags + * @display_diagnostics: Display diagnostics (useful in case of errors) + * @include_comments: Include comments in the type database + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_c_file_with_cflags(const char *file, const char *cflags, + bool display_diagnostics, bool include_comments) + FY_EXPORT; + +/** + * fy_reflection_from_packed_blob() - Create a reflection from a packed blob + * + * Create a reflection from a packed blob. + * + * @blob: A pointer to the binary blob + * @blob_size: The size of the blob + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_packed_blob(const void *blob, size_t blob_size) + FY_EXPORT; + +/** + * fy_reflection_to_packed_blob() - Create blob from a reflection + * + * Create a packed blob from the given reflection + * + * @rfl: The reflection + * @blob_sizep: Pointer to a variable to store the generated blobs size + * @include_comments: Include comments in the blob + * @include_location: Include the location information in the blob + * + * Returns: + * A pointer to the blob, or NULL in case of an error + */ +void * +fy_reflection_to_packed_blob(struct fy_reflection *rfl, size_t *blob_sizep, + bool include_comments, bool include_location) + FY_EXPORT; + +/** + * fy_reflection_from_packed_blob_file() - Create a reflection from a packed blob file + * + * Create a reflection from the given packed blob file + * + * @blob_file: The name of the blob file + * + * Returns: + * The reflection pointer, or NULL if an error occured. + */ +struct fy_reflection * +fy_reflection_from_packed_blob_file(const char *blob_file) + FY_EXPORT; + +/** + * fy_reflection_to_packed_blob_file() - Create a packed blob file from reflection + * + * Create a packed blob file from the given reflection + * + * @rfl: The reflection + * @blob_file: The name of the blob file + * + * Returns: + * 0 on success, -1 on error + */ +int +fy_reflection_to_packed_blob_file(struct fy_reflection *rfl, const char *blob_file) + FY_EXPORT; + +/** + * fy_type_info_iterate() - Iterate over the types of the reflection + * + * This method iterates over all the types of a reflection. + * The start of the iteration is signalled by a NULL in \*prevp. + * + * @rfl: The reflection + * @prevp: The previous type sequence iterator + * + * Returns: + * The next type in sequence or NULL at the end of the type sequence. + */ +const struct fy_type_info * +fy_type_info_iterate(struct fy_reflection *rfl, void **prevp) + FY_EXPORT; + +/** + * fy_type_info_reverse_iterate() - Iterate over the types of the reflection in reverse + * + * This method iterates over all the types of a reflection in reverse. + * The start of the iteration is signalled by a NULL in \*prevp. + * + * @rfl: The reflection + * @prevp: The previous type sequence iterator + * + * Returns: + * The next type in sequence or NULL at the end of the type sequence. + */ +const struct fy_type_info * +fy_type_info_reverse_iterate(struct fy_reflection *rfl, void **prevp) + FY_EXPORT; + + +/** + * fy_type_info_to_reflection() - Get the reflection a type belongs to + * + * Return the reflection this type belongs to + * + * @ti: The type info + * + * Returns: + * The reflection this type belongs to, or NULL if bad ti argument + */ +struct fy_reflection * +fy_type_info_to_reflection(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_type_info_generate_name() - Generate a name for a type + * + * Generate a name from the type by traversing the type definitions + * down to their dependent primitive types. + * + * @ti: The type info + * @field: The field if using the call to generate a field definition + * or NULL if not. + * @normalized: True to return the normalized name, that is without any + * superfluous whitespace for pretty printing. + * + * Returns: + * A malloc()'ed pointer to the name, or NULL in case of an error. + * This pointer must be free()'d when the caller is done with it. + */ +char * +fy_type_info_generate_name(const struct fy_type_info *ti, const char *field, bool normalized) + FY_EXPORT; + +/** + * fy_type_name_normalize() - Normalize a C type name + * + * Normalize a type name by removing superfluous whitespace, converting + * it to a format that is suitable for type name comparison. + * Note that no attempt is made to verify that the type name is a valid + * C one, so caller beware. + * + * @type_name: The type name to normalize + * + * Returns: + * A malloc()'ed pointer to the normalized name, or NULL in case of an error. + * This pointer must be free()'d when the caller is done with it. + */ +char * +fy_type_name_normalize(const char *type_name) + FY_EXPORT; + +/** + * fy_type_info_clear_marker() - Clear the marker on a type + * + * Clear the marker on a type. Note this call will not clear the + * markers of the dependent types. + */ +void +fy_type_info_clear_marker(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_type_info_mark() - Mark a type and it's dependencies + * + * Mark the type and recursively mark all types this one depends on. + */ +void +fy_type_info_mark(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_type_info_is_marked() - Check whether a type is marked + * + * Check the mark of a type + * + * Returns: + * true if the type is marked, false otherwise + */ +bool +fy_type_info_is_marked(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_type_info_eponymous_offset() - Offset of an anonymous type from the + * closest eponymous parent type. + * + * For anonymous types, get the offset from the start of the enclosing + * eponymous type. For example: + * + * struct baz { + * int foo; + * struct { // <- anonymous + * int bar; // <- offset from baz + * } bar; + * }; + * + * @ti: The anonymous type + * + * Returns: + * The offset from the closest eponymous parent type or 0 if not anonymous + */ +size_t +fy_type_info_eponymous_offset(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_type_info_get_comment() - Get the comment for a type + * + * Retrieve the 'cooked' comment for a type. The cooking consists of + * (trying) to remove comment formatting. For example: + * + * // this is a comment + * // which requires cooking + * + * Would be cooked as + * + * this is a comment + * which requires cooking + * + * And /\* this is a comment *\/ -> 'this is a comment' + * + * @ti: The type info + * + * Returns: + * The cooked comment, or NULL + */ +const char * +fy_type_info_get_comment(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_field_info_get_comment() - Get the comment for a field + * + * Retrieve the 'cooked' comment for a field. The cooking consists of + * (trying) to remove comment formatting. For example: + * + * // this is a comment + * // which requires cooking + * + * Would be cooked as + * + * this is a comment + * which requires cooking + * + * And /\* this is a comment *\/ -> 'this is a comment' + * + * @fi: The field info + * + * Returns: + * The cooked comment, or NULL + */ +const char * +fy_field_info_get_comment(const struct fy_field_info *fi) + FY_EXPORT; + +/** + * fy_type_info_get_yaml_annotation() - Get the yaml annotation of this type + * + * Retrieve a document containing the yaml keyword annotations of this type + * + * @ti: The type info + * + * Returns: + * The yaml annotation document or NULL + */ +struct fy_document * +fy_type_info_get_yaml_annotation(const struct fy_type_info *ti) + FY_EXPORT; + +const char * +fy_type_info_get_yaml_name(const struct fy_type_info *ti) + FY_EXPORT; + +const char * +fy_field_info_get_yaml_name(const struct fy_field_info *fi) + FY_EXPORT; + +/** + * fy_field_info_get_yaml_annotation() - Get the yaml annotation document for this field + * + * Retrieve a document containing the yaml keyword annotations of this field + * + * @fi: The field info + * + * Returns: + * The yaml annotation document, or NULL + */ +struct fy_document * +fy_field_info_get_yaml_annotation(const struct fy_field_info *fi) + FY_EXPORT; + +/** + * fy_reflection_dump() - Dump internal type database + * + * @rfl: The reflection + * @marked_only: Dump marked structures only + * @no_location: Do not display location information + */ +void fy_reflection_dump(struct fy_reflection *rfl, bool marked_only, bool no_location) + FY_EXPORT; + +/** + * fy_field_info_index() - Get the index of a field of a type + * + * Retrieve the 0-based index of a field info. The first + * structure member is 0, the second 1 etc. + * + * @fi: The pointer to the field info + * + * Returns: + * The index of the field if >= 0, -1 on error + */ +int +fy_field_info_index(const struct fy_field_info *fi) + FY_EXPORT; + +/** + * fy_type_info_lookup_field() - Lookup a field of a type by name + * + * Lookup the field with the given name on the given type. + * + * @ti: The pointer to the type info + * @name: The name of the field + * + * Returns: + * A pointer to the field info if field was found, NULL otherwise + */ +const struct fy_field_info * +fy_type_info_lookup_field(const struct fy_type_info *ti, const char *name) + FY_EXPORT; + +/** + * fy_type_info_lookup_field_by_enum_value() - Lookup an enum field of a type by value + * + * Lookup the field with the enum value on the given type. + * + * @ti: The pointer to the type info + * @val: The value of the enumeration + * + * Returns: + * A pointer to the field info if field was found, NULL otherwise + */ +const struct fy_field_info * +fy_type_info_lookup_field_by_enum_value(const struct fy_type_info *ti, long long val) + FY_EXPORT; + +/** + * fy_type_info_lookup_field_by_unsigned_enum_value() - Lookup an enum field of a type by unsigned 0value + * + * Lookup the field with the enum value on the given type. + * + * @ti: The pointer to the type info + * @val: The value of the enumeration + * + * Returns: + * A pointer to the field info if field was found, NULL otherwise + */ +const struct fy_field_info * +fy_type_info_lookup_field_by_unsigned_enum_value(const struct fy_type_info *ti, unsigned long long val) + FY_EXPORT; + +/** + * fy_type_info_set_userdata() - Set the userdata associated with a type + * + * Set the user data associated with the given type. + * + * @ti: The pointer to the type info + * @userdata: A void pointer that can be used to retreive the data + */ +void +fy_type_info_set_userdata(const struct fy_type_info *ti, void *userdata) + FY_EXPORT; + +/** + * fy_type_info_get_userdata() - Get the userdata associated with a type + * + * Retrieve the user data associated with the given type via a + * previous call to fy_type_info_set_userdata(). + * + * @ti: The pointer to the type info + * + * Returns: + * The userdata associated with the type, or NULL on error + */ +void * +fy_type_info_get_userdata(const struct fy_type_info *ti) + FY_EXPORT; + +/** + * fy_field_info_set_userdata() - Set the userdata associated with a field + * + * Set the user data associated with the given field. + * + * @fi: The pointer to the field info + * @userdata: A void pointer that can be used to retreive the data + */ +void +fy_field_info_set_userdata(const struct fy_field_info *fi, void *userdata) + FY_EXPORT; + +/** + * fy_field_info_get_userdata() - Get the userdata associated with a field + * + * Retrieve the user data associated with the given field via a + * previous call to fy_field_info_set_userdata(). + * + * @fi: The pointer to the field info + * + * Returns: + * The userdata associated with the field, or NULL on error + */ +void * +fy_field_info_get_userdata(const struct fy_field_info *fi) + FY_EXPORT; + #ifdef __cplusplus } #endif diff --git a/libfyaml.pc.in b/libfyaml.pc.in index 299d11ef..ad9cd497 100644 --- a/libfyaml.pc.in +++ b/libfyaml.pc.in @@ -6,5 +6,5 @@ includedir=@includedir@ Name: libfyaml Description: Fancy YAML 1.3 parser library Version: @PACKAGE_VERSION@ -Libs: -L${libdir} @ASAN_LIBS@ -lfyaml @PTHREAD_LIBS@ +Libs: -L${libdir} @ASAN_LIBS@ -lfyaml @PTHREAD_LIBS@ @LIBCLANG_LDFLAGS@ @LIBCLANG_LIBS@ Cflags: -I${includedir} @ASAN_CFLAGS@ @PTHREAD_CFLAGS@ diff --git a/src/Makefile.am b/src/Makefile.am index 56c1a9d6..8bd503a8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -29,7 +29,25 @@ libfyaml_la_SOURCES = \ util/fy-utils.c util/fy-utils.h \ util/fy-endian.h \ util/fy-blob.c util/fy-blob.h \ - util/fy-id.h + util/fy-id.h \ + util/fy-vlsize.h \ + util/fy-allocator.c util/fy-allocator.h \ + util/fy-allocator-linear.c util/fy-allocator-linear.h \ + util/fy-allocator-malloc.c util/fy-allocator-malloc.h \ + util/fy-allocator-mremap.c util/fy-allocator-mremap.h \ + util/fy-allocator-dedup.c util/fy-allocator-dedup.h \ + util/fy-allocator-auto.c util/fy-allocator-auto.h \ + util/fy-generic.c util/fy-generic.h \ + util/fy-generic-decoder.c util/fy-generic-decoder.h \ + util/fy-generic-encoder.c util/fy-generic-encoder.h \ + reflection/fy-reflection.c reflection/fy-reflection-private.h \ + reflection/fy-packed-backend.c reflection/fy-packed-backend.h \ + reflection/fy-registry.c + +if HAVE_LIBCLANG +libfyaml_la_SOURCES += \ + reflection/fy-clang-backend.c reflection/fy-clang-backend.h +endif libfyaml_la_CPPFLAGS = $(AM_CPPFLAGS) \ -I$(top_srcdir)/src/lib \ @@ -39,6 +57,12 @@ libfyaml_la_CFLAGS = $(AM_CFLAGS) libfyaml_la_LDFLAGS = -no-undefined $(AM_LDFLAGS) $(AM_LIBLDFLAGS) \ -version $(LIBTOOL_VERSION) +if HAVE_LIBCLANG +libfyaml_la_CPPFLAGS += $(LIBCLANG_CPPFLAGS) +libfyaml_la_CFLAGS += $(LIBCLANG_CFLAGS) +libfyaml_la_LDFLAGS += $(LIBCLANG_LDFLAGS) $(LIBCLANG_LIBS) +endif + bin_PROGRAMS = noinst_PROGRAMS = diff --git a/src/internal/libfyaml-parser.c b/src/internal/libfyaml-parser.c index d24f03f8..14ddd335 100644 --- a/src/internal/libfyaml-parser.c +++ b/src/internal/libfyaml-parser.c @@ -17,9 +17,16 @@ #include #include #include +#include +#include #include #include -#include +#include +#include +#include +#include +#include +#include #include @@ -30,6 +37,16 @@ #include "fy-parse.h" #include "fy-walk.h" #include "fy-blob.h" +#include "fy-generic.h" +#include "fy-generic-decoder.h" +#include "fy-generic-encoder.h" +#include "fy-id.h" +#include "fy-allocator.h" +#include "fy-allocator-linear.h" +#include "fy-allocator-malloc.h" +#include "fy-allocator-mremap.h" +#include "fy-allocator-dedup.h" +#include "fy-allocator-auto.h" #include "fy-valgrind.h" @@ -61,6 +78,9 @@ #define OPT_YAML_1_2 4001 #define OPT_YAML_1_3 4002 +#define OPT_ALLOCATOR 4003 +#define OPT_CACHE 4004 + static struct option lopts[] = { {"include", required_argument, 0, 'I' }, {"mode", required_argument, 0, 'm' }, @@ -86,6 +106,8 @@ static struct option lopts[] = { {"yaml-1.3", no_argument, 0, OPT_YAML_1_3 }, {"sloppy-flow-indentation", no_argument, 0, OPT_SLOPPY_FLOW_INDENTATION }, {"ypath-aliases", no_argument, 0, OPT_YPATH_ALIASES }, + {"allocator", required_argument, 0, OPT_ALLOCATOR }, + {"cache", required_argument, 0, OPT_CACHE }, {"quiet", no_argument, 0, 'q' }, {"help", no_argument, 0, 'h' }, {0, 0, 0, 0 }, @@ -97,7 +119,7 @@ static struct option lopts[] = { #define LIBYAML_MODES "" #endif -#define MODES "parse|scan|copy|testsuite|dump|dump2|build|walk|reader|compose|iterate|comment|pathspec|shell-split|parse-timing" LIBYAML_MODES +#define MODES "parse|scan|copy|testsuite|dump|dump2|build|walk|reader|compose|iterate|comment|pathspec|shell-split|parse-timing|generics|remap|parse-generic|idbit" LIBYAML_MODES static void display_usage(FILE *fp, char *progname) { @@ -280,10 +302,10 @@ static char *txt2esc_format(const char *s, int l, char *buf, int maxsz, int deli #define txt2esc_a(_s, _l) \ ({ \ - const char *__s = (const void *)(_s); \ - int __l = (_l); \ + const char *__s = (const void *)(_s); \ + int __l = (_l); \ int _ll = txt2esc_length(__s, __l, '\''); \ - txt2esc_format(__s, __l, alloca(_ll + 1), _ll + 1, '\''); \ + txt2esc_format(__s, __l, alloca(_ll + 1), _ll + 1, '\''); \ }) #define fy_atom_get_esc_text_a(_atom) txt2esc_a(fy_atom_get_text_a(_atom), -1) @@ -1208,9 +1230,9 @@ int do_libyaml_scan(yaml_parser_t *parser) #define mark_a(_m) \ ({ \ yaml_mark_t *__m = (_m); \ - char *_s = alloca(30); \ - snprintf(_s, 30, "%zu/%zu/%zu", __m->index, __m->line, __m->column); \ - _s; \ + char *_s = alloca(30); \ + snprintf(_s, 30, "%zu/%zu/%zu", __m->index, __m->line, __m->column); \ + _s; \ }) void dump_libyaml_event(yaml_event_t *event) @@ -3992,7 +4014,7 @@ int do_shell_split(int in_argc, char *in_argv[]) return 0; } -int do_parse_timing(int argc, char *argv[], bool disable_mmap) +int do_parse_timing(int argc, char *argv[]) { void *blob; size_t blob_size; @@ -4109,6 +4131,1020 @@ int do_parse_timing(int argc, char *argv[], bool disable_mmap) return 0; } +void fy_generic_print_primitive(FILE *fp, fy_generic v) +{ + const char *sv; + fy_generic iv; + fy_generic key, value; + const fy_generic *items; + size_t i, count, slen; + + if (v == fy_invalid) + fprintf(fp, "invalid"); + + switch (fy_generic_get_type(v)) { + case FYGT_NULL: + fprintf(fp, "%s", "null"); + return; + + case FYGT_BOOL: + fprintf(fp, "%s", fy_generic_get_bool(v) ? "true" : "false"); + return; + + case FYGT_INT: + fprintf(fp, "%lld", fy_generic_get_int(v)); + return; + + case FYGT_FLOAT: + fprintf(fp, "%f", fy_generic_get_float(v)); + return; + + case FYGT_STRING: + sv = fy_generic_get_string_size_alloca(v, &slen); + fprintf(fp, "'%.*s'", (int)slen, sv); + return; + + case FYGT_SEQUENCE: + items = fy_generic_sequence_get_items(v, &count); + fprintf(fp, "["); + for (i = 0; i < count; i++) { + iv = items[i]; + fy_generic_print_primitive(fp, iv); + if (i + 1 < count) + printf(", "); + } + fprintf(fp, "]"); + break; + + case FYGT_MAPPING: + items = fy_generic_mapping_get_pairs(v, &count); + fprintf(fp, "["); + for (i = 0; i < count; i++) { + key = items[i * 2]; + value = items[i * 2 + 1]; + fy_generic_print_primitive(fp, key); + fprintf(fp, ": "); + fy_generic_print_primitive(fp, value); + if (i + 1 < count) + printf(", "); + } + fprintf(fp, "]"); + break; + + case FYGT_ALIAS: + sv = fy_generic_get_alias_size_alloca(v, &slen); + fprintf(fp, "*'%.*s'", (int)slen, sv); + break; + + default: + assert(0); + abort(); + } +} + +fy_generic do_x(void) +{ + fy_generic vstr; + + // asm volatile("nop; nop" : : : "memory"); + vstr = fy_generic_string_alloca("test"); + // asm volatile("nop; nop" : : "r"(vstr) : "memory"); + + return vstr; +} + +fy_generic do_x2(void) +{ + fy_generic vf; + + asm volatile("nop; nop" : : : "memory"); + vf = fy_generic_float_alloca(128.0); + asm volatile("nop; nop" : : "r"(vf) : "memory"); + + return vf; +} + +fy_generic do_x3(void) +{ + fy_generic vf; + + asm volatile("nop; nop" : : : "memory"); + vf = fy_generic_float_alloca(128.1); + asm volatile("nop; nop" : : "r"(vf) : "memory"); + + return vf; +} + +int do_generics(int argc, char *argv[], const char *allocator) +{ + static const bool btable[] = { + false, true, + }; + bool bv; + static const long long itable[] = { + 0, 1, -1, LLONG_MAX, LLONG_MIN, + FYGT_INT_INPLACE_MAX, FYGT_INT_INPLACE_MIN, + FYGT_INT_INPLACE_MAX+1, FYGT_INT_INPLACE_MIN-1, + }; + long long iv; + static const char *stable[] = { + "", /* empty string */ + "0", + "01", + "012", + "0123", + "01234", + "012345", + "0123456", + "01234567", + "This is a string", + "invoice", + }; + const char *sv; + char sinplace[FYGT_STRING_INPLACE_BUF]; + size_t slen; + static const double ftable[] = { + 0.0, 1.0, -1.0, 0.1, -0.1, + 128.0, -128.0, + 256.1, -256.1, + INFINITY, -INFINITY, + NAN, -NAN, + }; + double fv; + static const size_t sztable[] = { + 0, + ((size_t)1 << 7) - 1, ((size_t)1 << 7), ((size_t)1 << 7) + 1, + ((size_t)1 << 14) - 1, ((size_t)1 << 14), ((size_t)1 << 14) + 1, + ((size_t)1 << 21) - 1, ((size_t)1 << 21), ((size_t)1 << 21) + 1, + ((size_t)1 << 28) - 1, ((size_t)1 << 28), ((size_t)1 << 28) + 1, + ((size_t)1 << 29) - 1, ((size_t)1 << 29), ((size_t)1 << 29) + 1, + ((size_t)1 << 35) - 1, ((size_t)1 << 35), ((size_t)1 << 35) + 1, + ((size_t)1 << 42) - 1, ((size_t)1 << 42), ((size_t)1 << 42) + 1, + ((size_t)1 << 49) - 1, ((size_t)1 << 49), ((size_t)1 << 49) + 1, + ((size_t)1 << 56) - 1, ((size_t)1 << 56), ((size_t)1 << 56) + 1, + ((size_t)1 << 57) - 1, ((size_t)1 << 57), ((size_t)1 << 57) + 1, + (size_t)UINT32_MAX, + (size_t)UINT64_MAX, + }; + uint8_t size_buf[FYGT_SIZE_ENCODING_MAX_64]; + uint8_t *szp __FY_DEBUG_UNUSED__; + size_t sz, szd; + uint32_t sz32d; + unsigned int i, j, k; + struct fy_generic_builder *gb; + fy_generic gbl, gi, gs, gf, gv; + fy_generic seq, map, map2; + struct fy_dedup_setup_data dsetupdata; + struct fy_linear_setup_data lsetupdata; + struct fy_allocator *a, *pa = NULL; + const void *gsetupdata = NULL; + char buf[4096]; + bool registered_allocator = false; + int rc __FY_DEBUG_UNUSED__; + + if (!allocator) + allocator = "linear"; + + /* setup the linear data always */ + memset(&lsetupdata, 0, sizeof(lsetupdata)); + lsetupdata.buf = buf; + lsetupdata.size = sizeof(buf); + + printf("using %s allocator\n", allocator); + + if (!strcmp(allocator, "linear")) { + gsetupdata = &lsetupdata; + } else if (!strcmp(allocator, "malloc")) { + gsetupdata = NULL; + } else if (!strcmp(allocator, "mremap")) { + gsetupdata = NULL; + } else if (!strcmp(allocator, "dedup")) { + + /* create the parent allocator */ + pa = fy_allocator_create("linear", &lsetupdata); + assert(pa); + + memset(&dsetupdata, 0, sizeof(dsetupdata)); + dsetupdata.parent_allocator = pa; + dsetupdata.bloom_filter_bits = 0; /* use default */ + dsetupdata.bucket_count_bits = 0; + + gsetupdata = &dsetupdata; + + } else { + fprintf(stderr, "unsupported allocator %s\n", allocator); + return -1; + +#if 0 + /* fake a linear one */ + rc = fy_allocator_register(allocator, &fy_linear_allocator_ops); + assert(!rc); + gsetupdata = &lsetupdata; + registered_allocator = true; +#endif + } + + printf("testing alloca methods\n"); + printf("null = %016lx\n", fy_null); + for (i = 0; i < ARRAY_SIZE(btable); i++) { + bv = btable[i]; + gbl = fy_generic_bool_alloca(bv); + printf("boolean/%s = %016lx %s\n", bv ? "true" : "false", gbl, + fy_generic_get_bool(gbl) ? "true" : "false"); + } + + for (i = 0; i < ARRAY_SIZE(itable); i++) { + iv = itable[i]; + gi = fy_generic_int_alloca(iv); + printf("int/%lld = %016lx %lld\n", iv, gi, + fy_generic_get_int(gi)); + } + + for (i = 0; i < ARRAY_SIZE(stable); i++) { + sv = stable[i]; + gs = fy_generic_string_alloca(sv); + printf("string/%s = %016lx", sv, gs); + + sv = fy_generic_get_string_size(gs, sinplace, &slen); + assert(sv); + printf(" %.*s\n", (int)slen, sv); + } + + for (i = 0; i < ARRAY_SIZE(ftable); i++) { + fv = ftable[i]; + gf = fy_generic_float_alloca(fv); + printf("float/%f = %016lx %f\n", fv, gf, + fy_generic_get_float(gf)); + } + + seq = fy_generic_sequence_alloca(3, ((fy_generic[]){ + fy_generic_bool_alloca(true), + fy_generic_int_alloca(100), + fy_generic_string_alloca("info")})); + assert(seq != fy_invalid); + + printf("seq:\n"); + fy_generic_print_primitive(stdout, seq); + printf("\n"); + + map = fy_generic_mapping_alloca(3, ((fy_generic[]){ + fy_generic_string_alloca("foo"), fy_generic_string_alloca("bar"), + fy_generic_string_alloca("frooz-larger"), fy_generic_string_alloca("what"), + fy_generic_string_alloca("seq"), seq})); + + assert(map != fy_invalid); + + printf("map:\n"); + fy_generic_print_primitive(stdout, map); + printf("\n"); + + gv = fy_generic_mapping_lookup(map, fy_generic_string_alloca("foo")); + printf("found: "); + fy_generic_print_primitive(stdout, gv); + printf("\n"); + + map = fy_generic_mapping_alloca(2, ((fy_generic[]){ + fy_generic_string_alloca("foo"), fy_generic_string_alloca("bar"), + fy_generic_sequence_alloca(2, ((fy_generic[]){ + fy_generic_int_alloca(10), + fy_generic_int_alloca(100)})), + fy_generic_float_alloca(3.14)})); + + fy_generic_print_primitive(stdout, map); + printf("\n"); + + gv = fy_generic_mapping_lookup(map, fy_generic_sequence_alloca(2, ((fy_generic[]){ + fy_generic_int_alloca(10), + fy_generic_int_alloca(100)}))); + printf("found: "); + fy_generic_print_primitive(stdout, gv); + printf("\n"); + + +#define ASTR(_x) \ + ({ \ + static const char __s[sizeof(_x) + 1] __attribute__((aligned(256))) = (_x); \ + __s; \ + }) + + { + const char *ss; + + asm volatile("nop; nop" : : : "memory"); + ss = ASTR("123"); + asm volatile("nop; nop" : : : "memory"); + + printf("%p %s\n", ss, ss); + } + + { + fy_generic vstr; + + asm volatile("nop; nop" : : : "memory"); + vstr = fy_generic_string_alloca("test"); + asm volatile("nop; nop" : : "r"(vstr) : "memory"); + + printf("vstr=0x%08lx\n", (unsigned long)vstr); + } + + + a = fy_allocator_create(allocator, gsetupdata); + assert(a); + + gb = fy_generic_builder_create(a, FY_ALLOC_TAG_NONE); + assert(gb); + + printf("created gb=%p\n", gb); + + printf("null = %016lx\n", fy_null); + for (i = 0; i < ARRAY_SIZE(btable); i++) { + bv = btable[i]; + gbl = fy_generic_bool_create(gb, bv); + printf("boolean/%s = %016lx %s\n", bv ? "true" : "false", gbl, + fy_generic_get_bool(gbl) ? "true" : "false"); + } + + for (i = 0; i < ARRAY_SIZE(itable); i++) { + iv = itable[i]; + gi = fy_generic_int_create(gb, iv); + printf("int/%lld = %016lx %lld\n", iv, gi, + fy_generic_get_int(gi)); + } + + for (i = 0; i < ARRAY_SIZE(stable); i++) { + sv = stable[i]; + gs = fy_generic_string_create(gb, sv); + printf("string/%s = %016lx", sv, gs); + + sv = fy_generic_get_string_size(gs, sinplace, &slen); + assert(sv); + printf(" %.*s\n", (int)slen, sv); + } + + for (i = 0; i < ARRAY_SIZE(sztable); i++) { + sz = sztable[i]; + printf("size_t/%zx =", sz); + j = fy_encode_size_bytes(sz); + assert(j <= sizeof(size_buf)); + printf(" (%d)", j); + + memset(size_buf, 0, sizeof(size_buf)); + szp = fy_encode_size(size_buf, sizeof(size_buf), sz); + assert(szp); + assert((unsigned int)(szp - size_buf) == j); + for (k = 0; k < j; k++) + printf(" %02x", size_buf[k] & 0xff); + + szd = 0; + fy_decode_size(size_buf, sizeof(size_buf), &szd); + printf(" decoded=%zx", szd); + + printf("\n"); + + /* decoding must match */ + assert(szd == sz); + } + + for (i = 0; i < ARRAY_SIZE(sztable); i++) { + sz = sztable[i]; + if (sz > UINT32_MAX) + continue; + printf("uint32_t/%zx =", sz); + j = fy_encode_size32_bytes((uint32_t)sz); + assert(j <= sizeof(size_buf)); + printf(" (%d)", j); + + memset(size_buf, 0, sizeof(size_buf)); + szp = fy_encode_size32(size_buf, sizeof(size_buf), (uint32_t)sz); + assert(szp); + assert((unsigned int)(szp - size_buf) == j); + for (k = 0; k < j; k++) + printf(" %02x", size_buf[k] & 0xff); + + sz32d = 0; + fy_decode_size32(size_buf, sizeof(size_buf), &sz32d); + printf(" decoded=%zx", (size_t)sz32d); + + printf("\n"); + + /* decoding must match */ + assert(sz32d == (uint32_t)sz); + + } + + for (i = 0; i < ARRAY_SIZE(ftable); i++) { + fv = ftable[i]; + gf = fy_generic_float_create(gb, fv); + printf("float/%f = %016lx %f\n", fv, gf, + fy_generic_get_float(gf)); + } + + seq = fy_generic_sequence_create(gb, 3, (fy_generic[3]){ + fy_generic_bool_create(gb, true), + fy_generic_int_create(gb, 100), + fy_generic_string_create(gb, "info") + }); + assert(seq != fy_invalid); + + fy_generic_print_primitive(stdout, seq); + printf("\n"); + + map = fy_generic_mapping_create(gb, 3, (fy_generic[]){ + fy_generic_string_create(gb, "foo"), fy_generic_string_create(gb, "bar"), + fy_generic_string_create(gb, "frooz-larger"), fy_generic_string_create(gb, "what"), + fy_generic_string_create(gb, "seq"), seq + }); + + assert(map != fy_invalid); + + fy_generic_print_primitive(stdout, map); + printf("\n"); + + gv = fy_generic_mapping_lookup(map, fy_generic_string_create(gb, "foo")); + printf("found: "); + fy_generic_print_primitive(stdout, gv); + printf("\n"); + + map = fy_generic_mapping_create(gb, 2, (fy_generic[]){ + fy_generic_string_create(gb, "foo"), fy_generic_string_create(gb, "bar"), + fy_generic_sequence_create(gb, 2, (fy_generic[]){ + fy_generic_int_create(gb, 10), + fy_generic_int_create(gb, 100)}), + fy_generic_float_create(gb, 3.14)}); + + fy_generic_print_primitive(stdout, map); + printf("\n"); + + gv = fy_generic_mapping_lookup(map, fy_generic_sequence_create(gb, 2, (fy_generic[]){ + fy_generic_int_create(gb, 10), + fy_generic_int_create(gb, 100)})); + printf("found: "); + fy_generic_print_primitive(stdout, gv); + printf("\n"); + + +#define ASTR(_x) \ + ({ \ + static const char __s[sizeof(_x) + 1] __attribute__((aligned(256))) = (_x); \ + __s; \ + }) + + { + const char *ss; + + asm volatile("nop; nop" : : : "memory"); + ss = ASTR("123"); + asm volatile("nop; nop" : : : "memory"); + + printf("%p %s\n", ss, ss); + } + + fy_allocator_dump(a); + + fy_generic_builder_destroy(gb); + + fy_allocator_destroy(a); + + a = fy_allocator_create(allocator, gsetupdata); + assert(a); + + gb = fy_generic_builder_create(a, FY_ALLOC_TAG_NONE); + assert(gb); + + map = fy_generic_mapping_create(gb, 3, (fy_generic[]){ + fy_generic_string_create(gb, "foo"), fy_generic_string_create(gb, "bar"), + fy_generic_string_create(gb, "frooz-larger\nshould \x01 be quoted"), fy_generic_string_create(gb, "what"), + fy_generic_string_create(gb, "seq"), fy_generic_sequence_create(gb, 3, (fy_generic[]){ + fy_generic_bool_create(gb, true), + fy_generic_int_create(gb, 100), + fy_generic_string_create(gb, "info") + }) + + }); + + assert(map != fy_invalid); + + fy_generic_builder_destroy(gb); + + fy_allocator_destroy(a); + + printf("testing dedup cases\n"); + printf("\n"); + + a = fy_allocator_create(allocator, gsetupdata); + assert(a); + + gb = fy_generic_builder_create(a, FY_ALLOC_TAG_NONE); + assert(gb); + + iv = LLONG_MAX; + gi = fy_generic_int_create(gb, iv); + gi = fy_generic_int_create(gb, iv); + fy_allocator_dump(a); + + gi = fy_generic_string_create(gb, "foo bar is big"); + gi = fy_generic_string_create(gb, "foo bar is big"); + fy_allocator_dump(a); + + map = fy_generic_mapping_create(gb, 3, (fy_generic[]){ + fy_generic_string_create(gb, "foo"), fy_generic_float_create(gb, 0.11111), + fy_generic_string_create(gb, "frooz-larger\nshould \x01 be quoted"), fy_generic_string_create(gb, "what"), + fy_generic_string_create(gb, "seq"), fy_generic_sequence_create(gb, 3, (fy_generic[]){ + fy_generic_bool_create(gb, true), + fy_generic_int_create(gb, 100), + fy_generic_string_create(gb, "info-fffffffffffffffffffffffffff") + }) + + }); + + map2 = fy_generic_mapping_create(gb, 3, (fy_generic[]){ + fy_generic_string_create(gb, "foo"), fy_generic_float_create(gb, 0.11111), + fy_generic_string_create(gb, "frooz-larger\nshould \x01 be quoted"), fy_generic_string_create(gb, "what"), + fy_generic_string_create(gb, "seq"), fy_generic_sequence_create(gb, 3, (fy_generic[]){ + fy_generic_bool_create(gb, true), + fy_generic_int_create(gb, 100), + fy_generic_string_create(gb, "info-fffffffffffffffffffffffffff") + }) + + }); + + fy_allocator_dump(a); + + printf("map = %p map2 = %p\n", (void *)map, (void *)map2); + + fy_generic_builder_destroy(gb); + + fy_allocator_destroy(a); + + if (registered_allocator) { + rc = fy_allocator_unregister(allocator); + assert(!rc); + } + + if (pa) + fy_allocator_destroy(pa); + + return 0; +} + +int do_remap(int argc, char *argv[]) +{ + size_t pagesz = sysconf(_SC_PAGESIZE); + size_t sz, limit, newsz; + void *mem, *mem2; + void **ptrs; + int i, maxcount; + + limit = (size_t)2 << 30; + + printf("1. Trying successive mmaps untils failure or limit %zu MB=%zu GB=%zu\n", limit, limit >> 20, limit >> 30); + sz = pagesz; + for (i = 0; sz <= limit; i++, sz <<= 1) { + mem = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + sz >>= 1; + printf("> map failed at cycle #%d (success at size=%zu MB=%zu GB=%zu)\n", i, sz, sz >> 20, sz >> 30); + break; + } + printf("> success at cycle #%d (size=%zu MB=%zu GB=%zu)\n", i, sz, sz >> 20, sz >> 30); + memset(mem, 0, sz); + munmap(mem, sz); + } + + printf("2. Trying to find number of mmap limit\n"); + maxcount = (pagesz / sizeof(void *)) * 16; /* pages worth of pointers */ + + ptrs = malloc(sizeof(*ptrs) * limit); + assert(ptrs); + for (i = 0; i < maxcount; i++) { + sz = i * pagesz; + mem = mmap(NULL, pagesz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + printf("> mmap #%d failed (total size=%zu MB=%zu GB=%zu)\n", + i, sz, sz >> 20, sz >> 30); + break; + } + if ((i % 128) == 0) { + printf("> mmap #%d success (total size=%zu MB=%zu GB=%zu)\n", + i, sz, sz >> 20, sz >> 30); + } + } + + if (i >= maxcount) { + printf("> mmap #%d completed (total size=%zu MB=%zu GB=%zu)\n", + i, sz, sz >> 20, sz >> 30); + } + + for (; i >= 0; i--) { + munmap(ptrs[i], pagesz); + } + free(ptrs); + + printf("3. Trying to find out limitations of mremap\n"); + sz = (size_t)1 << 20; + printf("> allocating size %zu MB=%zu GB=%zu mapping and trying to grow it\n", sz, sz >> 20, sz >> 30); + mem = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(mem != MAP_FAILED); + if (mem == MAP_FAILED) { + printf("Unable to mmap size %zu MB=%zu GB=%zu\n", sz, sz >> 20, sz >> 30); + goto next4; + } + memset(mem, 0, sz); + + printf("> growing the mapping to 1G\n"); + for (i = 0, newsz = sz << 1; newsz < (size_t)1 << 30; i++, newsz <<= 1) { + printf("> trying to mremap #%d size %zu MB=%zu GB=%zu\n", i, newsz, newsz >> 20, newsz >> 30); + mem2 = mremap(mem, sz, newsz, 0); + if (mem2 == MAP_FAILED) { + printf("Unable to mremap size %zu MB=%zu GB=%zu\n", newsz, newsz >> 20, newsz >> 30); + goto unmap3; + } + sz = newsz; + if (mem2 != mem) { + mem = mem2; + printf("mapping moved!\n"); + goto unmap3; + } + mem = mem2; + printf("> mremap successful #%d size %zu MB=%zu GB=%zu\n", i, sz, sz >> 20, sz >> 30); + memset(mem, 0, sz); + } + +unmap3: + munmap(mem, sz); + +next4: + printf("3. Trying to find out limitations of mremap take #2\n"); + printf("> allocating a large (1G) size mapping and trying to shring and regrow it\n"); + sz = (size_t)1 << 30; + mem = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(mem != MAP_FAILED); + if (mem == MAP_FAILED) { + printf("Unable to mmap size %zu MB=%zu GB=%zu\n", sz, sz >> 20, sz >> 30); + goto next5; + } + memset(mem, 0, sz); + + printf("> shrinking the mapping to 1M\n"); + newsz = (size_t)1 << 20; + mem2 = mremap(mem, sz, newsz, 0); + if (mem2 == MAP_FAILED) { + printf("Unable to mremap size %zu MB=%zu GB=%zu\n", newsz, newsz >> 20, newsz >> 30); + goto unmap4; + } + sz = newsz; + if (mem2 != mem) { + mem = mem2; + printf("mapping moved!\n"); + goto unmap4; + } + mem = mem2; + printf("> mremap successful (zeroing)\n"); + memset(mem, 0, sz); + + printf("> growing the mapping to 1G again\n"); + newsz = (size_t)1 << 30; + mem2 = mremap(mem, sz, newsz, 0); + if (mem2 == MAP_FAILED) { + printf("Unable to mremap size %zu MB=%zu GB=%zu\n", newsz, newsz >> 20, newsz >> 30); + goto unmap4; + } + sz = newsz; + if (mem2 != mem) { + mem = mem2; + printf("mapping moved!\n"); + goto unmap4; + } + mem = mem2; + printf("> mremap successful (zeroing)\n"); + memset(mem, 0, sz); + +unmap4: + memset(mem, 0, sz); + +next5: + + return 0; +} + +int do_parse_generic(struct fy_parser *fyp, const char *allocator, bool null_output, const char *cache) +{ + struct fy_generic_decoder *fygd = NULL; + struct fy_generic_encoder *fyge = NULL; + struct fy_emitter emit_state, *emit = &emit_state; + struct fy_emitter_cfg emit_cfg; + uint8_t size_buf[FYGT_SIZE_ENCODING_MAX_64]; + struct fy_mremap_setup_data mrsetupdata; + struct fy_dedup_setup_data dsetupdata; + struct fy_linear_setup_data lsetupdata; + struct fy_auto_setup_data asetupdata; + struct fy_allocator *a, *pa = NULL; + const void *gsetupdata = NULL; + bool registered_allocator = false; + struct fy_generic_builder *gb; + fy_generic vdir; + int rc __FY_DEBUG_UNUSED__; + size_t alloc_size; + ssize_t estimated_size; + const void *single_area; + size_t single_area_size, single_area_start, single_area_alloc; + void *single_area_copy = NULL; + size_t pagesz = sysconf(_SC_PAGESIZE); + void *cache_mem = NULL; + size_t cache_sz; + + (void)size_buf; + + estimated_size = fy_parse_estimate_queued_input_size(fyp); + + if (estimated_size < 0) { + fprintf(stderr, "Bad input\n"); + return -1; + } + + + printf("estimated_size=%zd\n", estimated_size); + + if (estimated_size != 0 && estimated_size != SSIZE_MAX) + alloc_size = (size_t)(estimated_size * 1.5); + else + alloc_size = (1 << 30) / 4; + + if (!allocator) + allocator = "linear"; + + /* setup the linear data always */ + memset(&lsetupdata, 0, sizeof(lsetupdata)); + lsetupdata.buf = NULL; + lsetupdata.size = alloc_size; + + printf("using %s allocator\n", allocator); + + if (!strcmp(allocator, "linear")) { + gsetupdata = &lsetupdata; + } else if (!strcmp(allocator, "malloc")) { + gsetupdata = NULL; + } else if (!strcmp(allocator, "mremap")) { + gsetupdata = NULL; + } else if (!strcmp(allocator, "dedup") || !strcmp(allocator, "dedup-linear")) { + + /* create the parent allocator */ + pa = fy_allocator_create("linear", &lsetupdata); + assert(pa); + + memset(&dsetupdata, 0, sizeof(dsetupdata)); + dsetupdata.parent_allocator = pa; + dsetupdata.bloom_filter_bits = 0; /* use default */ + dsetupdata.bucket_count_bits = 0; + + gsetupdata = &dsetupdata; + + allocator = "dedup"; + + } else if (!strcmp(allocator, "dedup-malloc")) { + + /* create the parent allocator */ + pa = fy_allocator_create("malloc", NULL); + assert(pa); + + memset(&dsetupdata, 0, sizeof(dsetupdata)); + dsetupdata.parent_allocator = pa; + dsetupdata.bloom_filter_bits = 0; /* use default */ + dsetupdata.bucket_count_bits = 0; + dsetupdata.estimated_content_size = estimated_size; + + gsetupdata = &dsetupdata; + + allocator = "dedup"; + } else if (!strcmp(allocator, "dedup-mremap")) { + + memset(&mrsetupdata, 0, sizeof(mrsetupdata)); + mrsetupdata.big_alloc_threshold = SIZE_MAX; + mrsetupdata.empty_threshold = 64; + mrsetupdata.grow_ratio = 1.5; + mrsetupdata.balloon_ratio = 8.0; + mrsetupdata.arena_type = FYMRAT_MMAP; + + if (estimated_size && estimated_size != SSIZE_MAX) + mrsetupdata.minimum_arena_size = estimated_size; + else + mrsetupdata.minimum_arena_size = 16 << 20; + + /* create the parent allocator */ + pa = fy_allocator_create("mremap", &mrsetupdata); + assert(pa); + + memset(&dsetupdata, 0, sizeof(dsetupdata)); + dsetupdata.parent_allocator = pa; + dsetupdata.bloom_filter_bits = 0; /* use default */ + dsetupdata.bucket_count_bits = 0; + dsetupdata.estimated_content_size = estimated_size; + + gsetupdata = &dsetupdata; + + allocator = "dedup"; + + } else if (!strcmp(allocator, "auto")) { + + memset(&asetupdata, 0, sizeof(asetupdata)); + asetupdata.scenario = FYAST_BALANCED; + asetupdata.estimated_max_size = (size_t)estimated_size; + + gsetupdata = &asetupdata; + + allocator = "auto"; + + } else { + fprintf(stderr, "unsupported allocator %s\n", allocator); + return -1; + } + + a = fy_allocator_create(allocator, gsetupdata); + assert(a); + + gb = fy_generic_builder_create(a, FY_ALLOC_TAG_NONE); + assert(gb); + + fygd = fy_generic_decoder_create(fyp, gb, false); + assert(fygd); + + vdir = fy_invalid; + + if (cache) { + struct stat sb; + uint64_t hdr[2]; + ssize_t rdn; + int fd; + + fd = open(cache, O_RDONLY); + if (fd >= 0) { + rc = fstat(fd, &sb); + assert(!rc); + /* only for regular files */ + if ((sb.st_mode & S_IFMT) == S_IFREG) { + cache_sz = sb.st_size; + + do { + rdn = read(fd, hdr, sizeof(hdr)); + } while (rdn == -1 && errno == EAGAIN); + assert(rdn != -1); + assert(rdn > 0); + assert(rdn == sizeof(hdr)); + + fprintf(stderr, "attempting to map fixed at %p\n", (void *)hdr[0]); + cache_mem = mmap((void *)(uintptr_t)hdr[0], cache_sz, PROT_READ, MAP_PRIVATE | MAP_FIXED_NOREPLACE, fd, 0); + assert(cache_mem != MAP_FAILED); + fprintf(stderr, "success\n"); + + vdir = (fy_generic)hdr[1]; + } + close(fd); + } + } + + if (vdir == fy_invalid) { + vdir = fy_generic_decoder_parse_all_documents(fygd); + assert(vdir != fy_invalid); + + fy_generic_builder_trim(gb); + + single_area_size = 0; + single_area = fy_generic_builder_get_single_area(gb, &single_area_size, &single_area_start, &single_area_alloc); + if (!single_area) { + fprintf(stderr, "Builder has no single area\n"); + single_area_copy = NULL; + } else { + fprintf(stderr, "Builder has single area: %p sz=0x%zx start=0x%zx alloc=0x%zx\n", + single_area, single_area_size, single_area_start, single_area_alloc); + +#if 0 + ptrdiff_t d; + struct timespec before, after; + int64_t ns; + + BEFORE(); + single_area_copy = malloc(single_area_size); + assert(single_area_copy); + memcpy(single_area_copy, single_area, single_area_size); + ns = AFTER(); + + fprintf(stderr, "single area copy: %p sz=0x%zx\n", single_area_copy, single_area_size); + printf("copy in %3.2fms\n", (double)((ns / 1000)/1000.0)); + + d = single_area_copy - single_area; + printf("relocation delta %lx\n", (long)d); + + BEFORE(); + printf("vdir before relocation %p\n", (void *)vdir); + vdir = fy_generic_relocate(single_area_copy, single_area_copy + single_area_size, vdir, d); + printf("vdir after relocation %p\n", (void *)vdir); + ns = AFTER(); + printf("relocation in %3.2fms\n", (double)((ns / 1000)/1000.0)); + + if (!null_output) { + rc = fy_generic_encoder_emit_all_documents(fyge, vdir); + assert(!rc); + } +#endif + + if (cache && ((uintptr_t)single_area & (uintptr_t)(pagesz - 1)) == 0 && single_area_start >= 2 * sizeof(uint64_t)) { + int fd; + void *hdr; + const void *p; + ssize_t wrn; + size_t left, hdrsz; + + fprintf(stderr, "Builder can create cache %s\n", cache); + + fd = open(cache, O_CREAT|O_WRONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IRGRP | S_IROTH); + if (fd >= 0) { + p = single_area; + left = single_area_size; + + hdrsz = single_area_start; + hdr = alloca(hdrsz); + memset(hdr, 0, hdrsz); + ((uint64_t *)hdr)[0] = (uintptr_t)single_area; /* store the mapping address */ + ((uint64_t *)hdr)[1] = (uintptr_t)vdir; /* store the directory */ + + do { + wrn = write(fd, hdr, hdrsz); + } while (wrn == -1 && errno == EAGAIN); + assert(wrn != -1); + assert(wrn > 0); + assert((size_t)wrn == hdrsz); + p += (size_t)wrn; + left -= (size_t)wrn; + + while (left > 0) { + do { + wrn = write(fd, p, left); + } while (wrn == -1 && errno == EAGAIN); + assert(wrn != -1); + assert(wrn > 0); + p += (size_t)wrn; + left -= (size_t)wrn; + } + + close(fd); + } + + } + + } + } + + fy_generic_decoder_destroy(fygd); + fygd = NULL; + + fprintf(stderr, "before trim\n"); + fy_allocator_dump(a); + + fy_generic_builder_trim(gb); + + fprintf(stderr, "after trim\n"); + fy_allocator_dump(a); + + memset(&emit_cfg, 0, sizeof(emit_cfg)); + emit_cfg.flags = 0; + rc = fy_emit_setup(emit, &emit_cfg); + assert(!rc); + + fyge = fy_generic_encoder_create(emit, false); + assert(fyge); + + if (!null_output) { + rc = fy_generic_encoder_emit_all_documents(fyge, vdir); + assert(!rc); + } + + fy_generic_encoder_sync(fyge); + fy_generic_encoder_destroy(fyge); + + fy_emit_cleanup(emit); + + fy_generic_builder_destroy(gb); + gb = NULL; + + fy_allocator_destroy(a); + a = NULL; + + if (registered_allocator) { + rc = fy_allocator_unregister(allocator); + assert(!rc); + } + + if (pa) { + fy_allocator_destroy(pa); + pa = NULL; + } + + if (single_area_copy) + free(single_area_copy); + + return 0; +} + int apply_flags_option(const char *arg, unsigned int *flagsp, int (*modify_flags)(const char *what, unsigned int *flagsp)) { @@ -4169,6 +5205,8 @@ int main(int argc, char *argv[]) const char *walkstart = "/"; bool use_callback = false; bool null_output = false; + const char *allocator = "linear"; + const char *cache = NULL; fy_valgrind_check(&argc, &argv); @@ -4274,6 +5312,12 @@ int main(int argc, char *argv[]) case OPT_YPATH_ALIASES: cfg.flags |= FYPCF_YPATH_ALIASES; break; + case OPT_ALLOCATOR: + allocator = optarg; + break; + case OPT_CACHE: + cache = optarg; + break; case 'q': cfg.flags |= FYPCF_QUIET; break; @@ -4304,7 +5348,11 @@ int main(int argc, char *argv[]) strcmp(mode, "crash") && strcmp(mode, "badutf8") && strcmp(mode, "shell-split") && - strcmp(mode, "parse-timing") + strcmp(mode, "parse-timing") && + strcmp(mode, "generics") && + strcmp(mode, "remap") && + strcmp(mode, "parse-generic") && + strcmp(mode, "idbit") #if defined(HAVE_LIBYAML) && HAVE_LIBYAML && strcmp(mode, "libyaml-scan") && strcmp(mode, "libyaml-parse") @@ -4561,11 +5609,29 @@ int main(int argc, char *argv[]) goto cleanup; } } else if (!strcmp(mode, "parse-timing")) { - rc = do_parse_timing(argc, argv, !!(cfg.flags & FYPCF_DISABLE_MMAP_OPT)); + rc = do_parse_timing(argc, argv); if (rc < 0) { /* fprintf(stderr, "do_parse_timing() error %d\n", rc); */ goto cleanup; } + } else if (!strcmp(mode, "generics")) { + rc = do_generics(argc, argv, allocator); + if (rc < 0) { + /* fprintf(stderr, "do_generics() error %d\n", rc); */ + goto cleanup; + } + } else if (!strcmp(mode, "parse-generic")) { + rc = do_parse_generic(fyp, allocator, null_output, cache); + if (rc < 0) { + /* fprintf(stderr, "do_generics() error %d\n", rc); */ + goto cleanup; + } + } else if (!strcmp(mode, "remap")) { + rc = do_remap(argc, argv); + if (rc < 0) { + /* fprintf(stderr, "do_generics() error %d\n", rc); */ + goto cleanup; + } } #if defined(HAVE_LIBYAML) && HAVE_LIBYAML if (!strcmp(mode, "libyaml-diff")) { diff --git a/src/reflection/fy-clang-backend.c b/src/reflection/fy-clang-backend.c new file mode 100644 index 00000000..e26d4eb4 --- /dev/null +++ b/src/reflection/fy-clang-backend.c @@ -0,0 +1,1182 @@ +/* + * fy-clang-backend.c - Clang based C type backend + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fy-reflection-private.h" + +#include "fy-clang-backend.h" + +/* clang */ +#include +#include + +static int clang_reflection_setup(struct fy_reflection *rfl); +static void clang_reflection_cleanup(struct fy_reflection *rfl); + +static int clang_import_setup(struct fy_import *imp, const void *user); +static void clang_import_cleanup(struct fy_import *imp); + +static int clang_decl_setup(struct fy_decl *decl, void *user); +static void clang_decl_cleanup(struct fy_decl *decl); + +static int clang_type_setup(struct fy_type *ft, void *user); +static void clang_type_cleanup(struct fy_type *ft); + +static const struct fy_reflection_backend_ops clang_ops = { + .reflection_setup = clang_reflection_setup, + .reflection_cleanup = clang_reflection_cleanup, + .import_setup = clang_import_setup, + .import_cleanup = clang_import_cleanup, + .type_setup = clang_type_setup, + .type_cleanup = clang_type_cleanup, + .decl_setup = clang_decl_setup, + .decl_cleanup = clang_decl_cleanup, +}; + +const struct fy_reflection_backend fy_reflection_clang_backend = { + .name = "clang", + .ops = &clang_ops, +}; + +struct clang_str { + CXString cx; + const char *str; +}; + +static inline void clang_str_setup(struct clang_str *cstr, CXString cx) +{ + assert(cstr); + memset(cstr, 0, sizeof(*cstr)); + + cstr->cx = cx; + cstr->str = clang_getCString(cx); +} + +static inline void clang_str_cleanup(struct clang_str *cstr) +{ + if (!cstr) + return; + if (cstr->str) + clang_disposeString(cstr->cx); + memset(cstr, 0, sizeof(*cstr)); +} + +static inline const char *clang_str_get(struct clang_str *cstr) +{ + if (!cstr || !cstr->str) + return ""; + return cstr->str; +} + +#define clang_str_get_alloca(_cx) \ + ({ \ + struct clang_str _cstr; \ + const char *_str1; \ + char *_str2; \ + size_t _len; \ + \ + clang_str_setup(&_cstr, _cx); \ + _str1 = clang_str_get(&_cstr); \ + _len = strlen(_str1); \ + _str2 = alloca(_len + 1); \ + memcpy(_str2, _str1, _len + 1); \ + clang_str_cleanup(&_cstr); \ + (const char *)_str2; \ + }) + +struct clang_reflect_backend { + CXIndex index; +}; + +struct clang_import_backend { + CXTranslationUnit tu; + CXTargetInfo ti; + struct clang_str target_triple; + int level; + char *file; + char *name; + const char *target_triple_str; +}; + +struct clang_decl_user { + CXCursor cursor; + CXCursor parent; + bool is_fake_func; +}; + +struct clang_decl_backend { + CXCursor cursor; + CXSourceLocation location; + CXType type; + CXComment comment; + struct clang_str raw_comment; + + FILE *comments_linear_fp; + char *comments_linear; + size_t comments_linear_size; + enum CXCommentKind last_comment_kind; + + struct clang_str cursor_kind_spelling; + struct clang_str cursor_spelling; + struct clang_str cursor_display_name; + struct clang_str cursor_usr; + + struct clang_str type_kind_spelling; + struct clang_str type_spelling; + + /* from clang_getFileLocation */ + CXFile file; + unsigned line, column, offset; + struct fy_source_location source_location; + + union { + struct { + struct { + CXType type; + CXCursor cursor; + struct clang_str type_kind_spelling; + struct clang_str type_spelling; + } underlying; + } typedef_info; + struct { + struct { + CXType type; + struct clang_str type_kind_spelling; + struct clang_str type_spelling; + } inttype; + } enum_info; + }; +}; + +struct clang_type_user { + CXType type; +}; + +struct clang_type_backend { + CXType type; + CXType dependent_type; + struct clang_str dependent_type_name; +}; + +static inline enum fy_type_kind +clang_map_type_kind(enum CXTypeKind clang_type, enum CXCursorKind cursor_kind) +{ + switch (clang_type) { + /* basic types */ + case CXType_Void: + return FYTK_VOID; + case CXType_Bool: + return FYTK_BOOL; + case CXType_Char_S: + return FYTK_CHAR; + case CXType_UChar: + return FYTK_UCHAR; + case CXType_SChar: + return FYTK_SCHAR; + case CXType_Short: + return FYTK_SHORT; + case CXType_UShort: + return FYTK_USHORT; + case CXType_Int: + return FYTK_INT; + case CXType_UInt: + return FYTK_UINT; + case CXType_Long: + return FYTK_LONG; + case CXType_ULong: + return FYTK_ULONG; + case CXType_LongLong: + return FYTK_LONGLONG; + case CXType_ULongLong: + return FYTK_ULONGLONG; + case CXType_Int128: + return FYTK_INT128; + case CXType_UInt128: + return FYTK_UINT128; + case CXType_Float: + return FYTK_FLOAT; + case CXType_Double: + return FYTK_DOUBLE; + case CXType_LongDouble: + return FYTK_LONGDOUBLE; + case CXType_Float128: + return FYTK_FLOAT128; + case CXType_Half: + return FYTK_FLOAT16; + + /* compound types */ + case CXType_Pointer: + return FYTK_PTR; + case CXType_Record: + switch (cursor_kind) { + case CXCursor_StructDecl: + return FYTK_STRUCT; + case CXCursor_UnionDecl: + return FYTK_UNION; + default: + break; + } + /* default */ + return FYTK_RECORD; + case CXType_Enum: + return FYTK_ENUM; + case CXType_Typedef: + return FYTK_TYPEDEF; + case CXType_ConstantArray: + return FYTK_CONSTARRAY; + case CXType_IncompleteArray: + return FYTK_INCOMPLETEARRAY; + + case CXType_FunctionProto: + case CXType_FunctionNoProto: + return FYTK_FUNCTION; + + default: + break; + } + + return FYTK_INVALID; +} + +/* 1=unsigned, -1=signed, 0 sign not relevant */ +static inline int +clang_type_kind_signess(enum CXTypeKind clang_type) +{ + switch (clang_type) { + case CXType_Bool: + case CXType_UChar: + case CXType_UShort: + case CXType_UInt: + case CXType_ULong: + case CXType_ULongLong: + case CXType_UInt128: + return 1; /* unsigned */ + case CXType_Char_S: + case CXType_SChar: + case CXType_Short: + case CXType_Int: + case CXType_Long: + case CXType_LongLong: + case CXType_Int128: + case CXType_Float: + case CXType_Double: + case CXType_LongDouble: + return -1; + + /* anything else doesn't have a sign */ + default: + break; + } + + return 0; +} + +static struct fy_decl * +clang_lookup_decl_by_cursor(struct fy_reflection *rfl, CXCursor cursor) +{ + struct fy_decl *decl; + struct clang_decl_backend *declb; + + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) { + declb = decl->backend; + if (!declb) + continue; + if (clang_equalCursors(declb->cursor, cursor)) + return decl; + } + return NULL; +} + +static struct fy_type * +clang_lookup_type_by_type(struct fy_reflection *rfl, CXType type, struct fy_decl *decl) +{ + struct fy_type *ft, *ft_best; + struct clang_type_backend *ftb; + const char *c1, *c2; + + ft_best = NULL; + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + ftb = ft->backend; + if (!ftb) + continue; + if (!clang_equalTypes(ftb->type, type)) + continue; + + /* if a decl was provided we must match the comment too */ + if (decl) { + c1 = fy_decl_get_yaml_comment(decl); + if (!c1) + c1 = ""; + c2 = fy_decl_get_yaml_comment(ft->decl); + if (!c2) + c2 = ""; + if (!strcmp(c1, c2)) + return ft; + } else if (!ft_best) { + /* first one in */ + ft_best = ft; + } else if (!ft->decl->raw_comment) { + /* we prefer the bare type */ + ft_best = ft; + } + } + return ft_best; +} + +static struct fy_type * +clang_register_type(struct fy_reflection *rfl, struct fy_decl *decl, CXCursor cursor) +{ + struct fy_type *ft = NULL; + struct clang_type_user ftu_local, *ftu = &ftu_local; + CXType type; + enum fy_type_kind type_kind; + const char *type_name; + bool elaborated, anonymous; + + type = clang_getCursorType(cursor); + + elaborated = false; + if (type.kind == CXType_Elaborated) { + elaborated = true; + type = clang_Type_getNamedType(type); + } + + type_kind = clang_map_type_kind(type.kind, clang_getTypeDeclaration(type).kind); + if (type_kind == FYTK_INVALID) + return NULL; + + anonymous = decl && decl->anonymous; + (void)anonymous; + + if (!decl || fy_type_kind_is_primitive(type_kind) || fy_type_kind_is_like_ptr(type_kind) || type_kind == FYTK_TYPEDEF) + type_name = clang_str_get_alloca(clang_getTypeSpelling(type)); + else { + type_name = clang_str_get_alloca(clang_getCursorDisplayName(cursor)); + if (!type_name[0]) + type_name = clang_str_get_alloca(clang_getCursorUSR(cursor)); + } + + ft = clang_lookup_type_by_type(rfl, type, decl); + + if (!ft) { + if (elaborated) { + printf("%s: elaborated type_name=%s does not exist\n", + __func__, type_name); + + /* try to pull it in */ + + goto err_out; + } + + memset(ftu, 0, sizeof(*ftu)); + ftu->type = type; + + ft = fy_type_create(rfl, type_kind, type_name, decl, ftu); + if (!ft) + goto err_out; + fy_type_list_add_tail(&rfl->types, ft); + } +out: + return ft; + +err_out: + fy_type_destroy(ft); + ft = NULL; + goto out; +} + +static enum CXChildVisitResult +fy_import_backend_root_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + struct fy_import *imp = client_data; + struct fy_reflection *rfl; + struct clang_decl_user declu_local, *declu = &declu_local; + enum fy_decl_type decl_type; + enum CXCursorKind cursor_kind; + const char *cursor_spelling; + const char *cursor_kind_spelling; + struct fy_decl *decl = NULL; + unsigned int ret; + bool visit_children; + + assert(imp); + rfl = imp->rfl; + assert(rfl); + + visit_children = true; + cursor_kind = clang_getCursorKind(cursor); + switch (cursor_kind) { + case CXCursor_StructDecl: + decl_type = FYDT_STRUCT; + break; + case CXCursor_UnionDecl: + decl_type = FYDT_UNION; + break; + case CXCursor_ClassDecl: + decl_type = FYDT_CLASS; + break; + case CXCursor_EnumDecl: + visit_children = false; + decl_type = FYDT_ENUM; + break; + case CXCursor_TypedefDecl: + decl_type = FYDT_TYPEDEF; + break; + case CXCursor_EnumConstantDecl: + decl_type = FYDT_ENUM_VALUE; + break; + default: + decl_type = FYDT_NONE; + break; + } + + /* cannot handle cursor type */ + if (decl_type == FYDT_NONE) + return CXChildVisit_Continue; + + /* skip declarations only */ + if (!clang_isCursorDefinition(cursor)) + return CXChildVisit_Continue; + +#if 1 + /* if the declaration is present already, do not continue */ + decl = clang_lookup_decl_by_cursor(rfl, cursor); + if (decl) + return CXChildVisit_Continue; +#endif + + cursor_spelling = clang_str_get_alloca(clang_getCursorSpelling(cursor)); + cursor_kind_spelling = clang_str_get_alloca(clang_getCursorKindSpelling(cursor_kind)); + (void)cursor_kind_spelling; + + /* visit the children first, so that we pick up definition intermingled */ + if (visit_children) { + ret = clang_visitChildren(cursor, fy_import_backend_root_visitor, imp); + if (ret) + return CXChildVisit_Break; + } + + memset(declu, 0, sizeof(*declu)); + declu->cursor = cursor; + declu->parent = parent; + + decl = fy_decl_create(rfl, imp, NULL, decl_type, cursor_spelling, declu); + if (!decl) + goto err_out; + + decl->type = clang_register_type(rfl, decl, cursor); + if (!decl->type) + goto err_out; + + fy_decl_list_add_tail(&rfl->decls, decl); + + return CXChildVisit_Continue; + +err_out: + fy_decl_destroy(decl); + return CXChildVisit_Break; +} + +static int clang_reflection_setup(struct fy_reflection *rfl) +{ + struct clang_reflect_backend *rflb; + const struct fy_clang_backend_reflection_cfg *backend_cfg; + + if (!rfl->cfg.backend_cfg) + return -1; + + rflb = malloc(sizeof(*rflb)); + if (!rflb) + goto err_out; + memset(rflb, 0, sizeof(*rflb)); + + rfl->backend = rflb; + + backend_cfg = rfl->cfg.backend_cfg; + + rflb->index = clang_createIndex(0, backend_cfg->display_diagnostics ? 1 : 0); + if (!rflb->index) + goto err_out; + + return 0; + +err_out: + clang_reflection_cleanup(rfl); + return -1; +} + +static void clang_reflection_cleanup(struct fy_reflection *rfl) +{ + struct clang_reflect_backend *rflb; + + if (!rfl || !rfl->backend) + return; + + rflb = rfl->backend; + rfl->backend = NULL; + + if (rflb->index) + clang_disposeIndex(rflb->index); + + free(rflb); +} + +static int clang_import_setup(struct fy_import *imp, const void *user) +{ + const struct fy_clang_backend_import_cfg *clang_cfg = user; + struct fy_reflection *rfl; + struct clang_reflect_backend *rflb; + struct clang_import_backend *impb; + int argc; + const char * const *argv; + const char *default_argv[2]; + int len; + unsigned num_diag, ret; + + if (!clang_cfg || !clang_cfg->file) + return -1; + + rfl = imp->rfl; + rflb = rfl->backend; + + impb = malloc(sizeof(*impb)); + if (!impb) + goto err_out; + memset(impb, 0, sizeof(*impb)); + + imp->backend = impb; + + if (!clang_cfg->argc || !clang_cfg->argv) { + default_argv[0] = "-fparse-all-comments"; + default_argv[1] = NULL; + + argc = 1; + argv = default_argv; + } else { + argc = clang_cfg->argc; + argv = clang_cfg->argv; + } + + assert(rflb); + assert(rflb->index); + + impb->file = strdup(clang_cfg->file); + if (!impb->file) + goto err_out; + + impb->tu = clang_createTranslationUnitFromSourceFile(rflb->index, impb->file, argc, argv, 0, NULL); + if (!impb->tu) + goto err_out; + + /* we don't want any! warnings or errors */ + num_diag = clang_getNumDiagnostics(impb->tu); + if (num_diag) + goto err_out; + + impb->ti = clang_getTranslationUnitTargetInfo(impb->tu); + if (!impb->ti) + goto err_out; + + clang_str_setup(&impb->target_triple, clang_TargetInfo_getTriple(impb->ti)); + + ret = clang_visitChildren(clang_getTranslationUnitCursor(impb->tu), fy_import_backend_root_visitor, imp); + if (ret) + goto err_out; + + impb->target_triple_str = clang_str_get(&impb->target_triple); + + len = snprintf(NULL, 0, "%s-%s", impb->file, impb->target_triple_str); + if (len < 0) + goto err_out; + + impb->name = malloc(len + 1); + if (!impb->name) + goto err_out; + + snprintf(impb->name, len + 1, "%s-%s", impb->file, impb->target_triple_str); + + /* forward */ + imp->name = impb->name; + + return 0; + +err_out: + clang_import_cleanup(imp); + return -1; +} + +static void clang_import_cleanup(struct fy_import *imp) +{ + struct clang_import_backend *impb; + + if (!imp || !imp->backend) + return; + + impb = imp->backend; + imp->backend = NULL; + + if (impb->name) + free(impb->name); + + clang_str_cleanup(&impb->target_triple); + + if (impb->ti) + clang_TargetInfo_dispose(impb->ti); + + if (impb->tu) + clang_disposeTranslationUnit(impb->tu); + + if (impb->file) + free(impb->file); + + free(impb); +} + +static void clang_decl_cleanup(struct fy_decl *decl) +{ + struct clang_decl_backend *declb; + + if (!decl || !decl->backend) + return; + + declb = decl->backend; + decl->backend = NULL; + + if (declb->comments_linear_fp) + fclose(declb->comments_linear_fp); + if (declb->comments_linear) + free(declb->comments_linear); + + switch (decl->decl_type) { + case FYDT_TYPEDEF: + clang_str_cleanup(&declb->typedef_info.underlying.type_kind_spelling); + clang_str_cleanup(&declb->typedef_info.underlying.type_spelling); + break; + case FYDT_ENUM: + clang_str_cleanup(&declb->enum_info.inttype.type_kind_spelling); + clang_str_cleanup(&declb->enum_info.inttype.type_spelling); + break; + default: + break; + } + + clang_str_cleanup(&declb->type_kind_spelling); + clang_str_cleanup(&declb->type_spelling); + + clang_str_cleanup(&declb->cursor_kind_spelling); + clang_str_cleanup(&declb->cursor_spelling); + clang_str_cleanup(&declb->cursor_display_name); + clang_str_cleanup(&declb->cursor_usr); + + clang_str_cleanup(&declb->raw_comment); + + free(declb); +} + +static enum CXVisitorResult +fy_import_backend_struct_field_visitor(CXCursor cursor, CXClientData client_data) +{ + enum CXVisitorResult res = CXVisit_Continue; + struct fy_import *imp; + struct fy_reflection *rfl; + struct fy_decl *decl = NULL; + struct fy_decl *parent_decl = client_data; + struct clang_decl_user declu_local, *declu = &declu_local; + CXType cursor_type; + const char *cursor_spelling; + const char *cursor_kind_spelling; + const char *type_spelling; + const char *type_kind_spelling; + bool is_bitfield; + + cursor_type = clang_getCursorType(cursor); + + cursor_spelling = clang_str_get_alloca(clang_getCursorSpelling(cursor)); + cursor_kind_spelling = clang_str_get_alloca(clang_getCursorKindSpelling(clang_getCursorKind(cursor))); + type_spelling = clang_str_get_alloca(clang_getTypeSpelling(cursor_type)); + type_kind_spelling = clang_str_get_alloca(clang_getTypeKindSpelling(cursor_type.kind)); + (void)cursor_kind_spelling; + (void)type_spelling; + (void)type_kind_spelling; + + /* fprintf(stderr, "> %s:'%s' cursor %s:'%s' type %s:'%s'\n", + decl_type_txt[parent_decl->decl_type], parent_decl->name, + cursor_kind_spelling, cursor_spelling, + type_kind_spelling, type_spelling); */ + + memset(declu, 0, sizeof(*declu)); + declu->cursor = cursor; + declu->parent = clang_getCursorSemanticParent(cursor); + + imp = parent_decl->imp; + rfl = imp->rfl; + + is_bitfield = clang_Cursor_isBitField(cursor); + + /* create field declaration */ + decl = fy_decl_create(rfl, imp, parent_decl, + !is_bitfield ? FYDT_FIELD : FYDT_BITFIELD, + cursor_spelling, declu); + if (!decl) + goto err_out; + + decl->type = clang_register_type(rfl, decl, cursor); + if (!decl->type) + goto err_out; + + /* add to the parent */ + fy_decl_list_add_tail(&parent_decl->children, decl); + +out: + return res; + +err_out: + fy_decl_destroy(decl); + res = CXVisit_Break; + goto out; +} + +static enum CXChildVisitResult +fy_import_backend_enum_visitor(CXCursor cursor, CXCursor parent, CXClientData client_data) +{ + enum CXChildVisitResult res = CXChildVisit_Continue; + struct fy_import *imp; + struct fy_reflection *rfl; + struct fy_decl *decl = NULL; + struct fy_decl *parent_decl = client_data; + struct clang_decl_user declu_local, *declu = &declu_local; + CXType cursor_type; + const char *cursor_spelling; + const char *cursor_kind_spelling; + const char *type_spelling; + const char *type_kind_spelling; + + cursor_type = clang_getCursorType(cursor); + + cursor_spelling = clang_str_get_alloca(clang_getCursorSpelling(cursor)); + cursor_kind_spelling = clang_str_get_alloca(clang_getCursorKindSpelling(clang_getCursorKind(cursor))); + type_spelling = clang_str_get_alloca(clang_getTypeSpelling(cursor_type)); + type_kind_spelling = clang_str_get_alloca(clang_getTypeKindSpelling(cursor_type.kind)); + (void)cursor_kind_spelling; + (void)type_spelling; + (void)type_kind_spelling; + + /* fprintf(stderr, ">>>> %s:'%s' cursor %s:'%s' type %s:'%s'\n", + decl_type_txt[parent_decl->decl_type], parent_decl->name, + cursor_kind_spelling, cursor_spelling, + type_kind_spelling, type_spelling); */ + + memset(declu, 0, sizeof(*declu)); + declu->cursor = cursor; + declu->parent = parent; + + imp = parent_decl->imp; + rfl = imp->rfl; + + /* create field declaration */ + decl = fy_decl_create(rfl, imp, parent_decl, FYDT_ENUM_VALUE, cursor_spelling, declu); + if (!decl) + goto err_out; + + decl->type = clang_register_type(rfl, decl, cursor); + if (!decl->type) + goto err_out; + + /* add to the parent */ + fy_decl_list_add_tail(&parent_decl->children, decl); + +out: + return res; + +err_out: + fy_decl_destroy(decl); + res = CXChildVisit_Break; + goto out; +} + +static const struct fy_source_location * +clang_backend_get_location(struct fy_decl *decl) +{ + struct fy_reflection *rfl; + struct fy_import *imp; + struct clang_decl_backend *declb; + CXString cxfilename; + const char *filename = NULL; + struct fy_source_file *source_file; + struct fy_source_file *source_file_new; + + assert(decl && decl->backend); + + imp = decl->imp; + rfl = imp->rfl; + declb = decl->backend; + + clang_getFileLocation(declb->location, &declb->file, &declb->line, &declb->column, &declb->offset); + // clang_getExpansionLocation(declb->location, &declb->file, &declb->line, &declb->column, &declb->offset); + + cxfilename = clang_getFileName(declb->file); + filename = clang_getCString(cxfilename); + + source_file = fy_reflection_lookup_source_file(rfl, filename); + if (!source_file) { + source_file_new = fy_source_file_create(rfl, filename); + if (!source_file_new) + goto err_out; + + source_file_new->filetime = clang_getFileTime(declb->file); + + source_file_new->system_header = !!clang_Location_isInSystemHeader(declb->location); + source_file_new->main_file = !!clang_Location_isFromMainFile(declb->location); + + fy_source_file_list_add_tail(&rfl->source_files, source_file_new); + + source_file = source_file_new; + } + + memset(&declb->source_location, 0, sizeof(declb->source_location)); + + declb->source_location.source_file = source_file; + declb->source_location.line = declb->line; + declb->source_location.column = declb->column; + declb->source_location.offset = declb->offset; + + clang_disposeString(cxfilename); + + return &declb->source_location; + +err_out: + + if (filename) + clang_disposeString(cxfilename); + + return NULL; +} + +static int clang_decl_setup(struct fy_decl *decl, void *user) +{ + const struct clang_decl_user *declu = user; + struct clang_decl_backend *declb; + const char *raw_comment; + int signess; + + /* fake declaration */ + if (!declu) + return 0; + + declb = malloc(sizeof(*declb)); + if (!declb) + goto err_out; + memset(declb, 0, sizeof(*declb)); + + decl->backend = declb; + + declb->cursor = declu->cursor; + declb->location = clang_getCursorLocation(declu->cursor); + declb->type = clang_getCursorType(declu->cursor); + declb->comment = clang_Cursor_getParsedComment(declu->cursor); + + clang_str_setup(&declb->cursor_kind_spelling, clang_getCursorKindSpelling(clang_getCursorKind(declu->cursor))); + clang_str_setup(&declb->cursor_spelling, clang_getCursorSpelling(declu->cursor)); + clang_str_setup(&declb->cursor_display_name, clang_getCursorDisplayName(declu->cursor)); + clang_str_setup(&declb->cursor_usr, clang_getCursorUSR(declu->cursor)); + + clang_str_setup(&declb->type_kind_spelling, clang_getTypeKindSpelling(declb->type.kind)); + clang_str_setup(&declb->type_spelling, clang_getTypeSpelling(declb->type)); + + /* must be after the backend assignment */ + decl->source_location = clang_backend_get_location(decl); + decl->spelling = clang_str_get(&declb->cursor_spelling); + decl->display_name = clang_str_get(&declb->cursor_display_name); + decl->signature = clang_str_get(&declb->cursor_usr); + + /* mark it as anonymous */ + decl->anonymous = !!clang_Cursor_isAnonymous(declu->cursor) || !decl->name || !decl->name[0]; + + decl->in_system_header = !!clang_Location_isInSystemHeader(declb->location); + decl->from_main_file = !!clang_Location_isFromMainFile(declb->location); + + switch (decl->decl_type) { + case FYDT_TYPEDEF: + declb->typedef_info.underlying.type = clang_getTypedefDeclUnderlyingType(declu->cursor); + declb->typedef_info.underlying.cursor = clang_getTypeDeclaration(declb->typedef_info.underlying.type); + clang_str_setup(&declb->typedef_info.underlying.type_kind_spelling, clang_getTypeKindSpelling(declb->typedef_info.underlying.type.kind)); + clang_str_setup(&declb->typedef_info.underlying.type_spelling, clang_getTypeSpelling(declb->typedef_info.underlying.type)); + + case FYDT_STRUCT: + case FYDT_UNION: + /* return value is bogus, will return non zero at the end of a normal visit */ + (void)clang_Type_visitFields(declb->type, fy_import_backend_struct_field_visitor, decl); + break; + + case FYDT_ENUM: + declb->enum_info.inttype.type = clang_getEnumDeclIntegerType(declu->cursor); + clang_str_setup(&declb->enum_info.inttype.type_kind_spelling, clang_getTypeKindSpelling(declb->enum_info.inttype.type.kind)); + clang_str_setup(&declb->enum_info.inttype.type_spelling, clang_getTypeSpelling(declb->enum_info.inttype.type)); + + /* XXX no concrete type for the enum, only storage size */ + decl->enum_decl.type_kind = clang_map_type_kind(declb->enum_info.inttype.type.kind, CXCursor_EnumDecl); + assert(fy_type_kind_is_enum_constant_decl(decl->enum_decl.type_kind)); + + /* there is no field for enums, so visit children */ + clang_visitChildren(declu->cursor, fy_import_backend_enum_visitor, decl); + break; + + case FYDT_ENUM_VALUE: + assert(decl->parent->decl_type == FYDT_ENUM); + + decl->enum_value_decl.type_kind = clang_map_type_kind(declb->type.kind, CXCursor_EnumConstantDecl); + assert(fy_type_kind_is_enum_constant_decl(decl->enum_value_decl.type_kind)); + + signess = clang_type_kind_signess(declb->type.kind); + assert(signess != 0); + if (signess > 0) + decl->enum_value_decl.val.u = clang_getEnumConstantDeclUnsignedValue(declu->cursor); + else + decl->enum_value_decl.val.s = clang_getEnumConstantDeclValue(declu->cursor); + break; + + case FYDT_FIELD: + assert(decl->parent->decl_type == FYDT_STRUCT || decl->parent->decl_type == FYDT_UNION); + decl->field_decl.byte_offset = clang_Cursor_getOffsetOfField(declu->cursor) / 8; + break; + + case FYDT_BITFIELD: + assert(decl->parent->decl_type == FYDT_STRUCT || decl->parent->decl_type == FYDT_UNION); + decl->bitfield_decl.bit_offset = clang_Cursor_getOffsetOfField(declu->cursor); + decl->bitfield_decl.bit_width = clang_getFieldDeclBitWidth(declu->cursor); + break; + + case FYDT_FUNCTION: + abort(); + break; + + default: + break; + } + + clang_str_setup(&declb->raw_comment, clang_Cursor_getRawCommentText(declu->cursor)); + raw_comment = clang_str_get(&declb->raw_comment); + if (raw_comment && strlen(raw_comment) > 0) + decl->raw_comment = raw_comment; + + return 0; + +err_out: + clang_decl_cleanup(decl); + return -1; +} + +static int clang_type_setup(struct fy_type *ft, void *user) +{ + const struct clang_type_user *ftu = user; + struct clang_type_user fttu_local, *fttu = &fttu_local; + struct fy_reflection *rfl = ft->rfl; + struct clang_type_backend *ftb; + const char *tname; + CXType ttype; + enum fy_type_kind ttype_kind; + struct fy_type *ftt; + bool elaborated; + struct fy_type *ft2; + struct clang_type_backend *ft2b; + long long llret; + int ret = 0; + + /* fake type */ + if (!ftu) + return 0; + + ftb = malloc(sizeof(*ftb)); + if (!ftb) + goto err_out; + memset(ftb, 0, sizeof(*ftb)); + + ftb->type = ftu->type; + + ft->backend = ftb; + + /* fill-in size and align for all types */ + ft->size = 0; + ft->align = 0; + + llret = clang_Type_getSizeOf(ftb->type); + if (llret > 0) + ft->size = llret; + llret = clang_Type_getAlignOf(ftb->type); + if (llret > 0) + ft->align = llret; + + if (ft->type_kind == FYTK_CONSTARRAY) + ft->element_count = clang_getNumElements(ftb->type); /* err, int should be enough for element counts */ + else + ft->element_count = 1; + + /* finally update the qualifiers */ + ft->is_const = !!clang_isConstQualifiedType(ftb->type); + ft->is_volatile = !!clang_isVolatileQualifiedType(ftb->type); + ft->is_restrict = !!clang_isRestrictQualifiedType(ftb->type); + + if (fy_type_kind_is_dependent(ft->type_kind)) { + + switch (ft->type_kind) { + case FYTK_TYPEDEF: + ttype = clang_getTypedefDeclUnderlyingType(clang_getTypeDeclaration(ftb->type)); + break; + case FYTK_PTR: + ttype = clang_getPointeeType(ftb->type); + break; + case FYTK_CONSTARRAY: + case FYTK_INCOMPLETEARRAY: + ttype = clang_getArrayElementType(ftb->type); + break; + case FYTK_ENUM: + ttype = clang_getEnumDeclIntegerType(clang_getTypeDeclaration(ftb->type)); + break; + default: + memset(&ttype, 0, sizeof(ttype)); + abort(); + break; + } + + ftb->dependent_type = ttype; + + elaborated = false; + if (ttype.kind == CXType_Elaborated) { + elaborated = true; + ttype = clang_Type_getNamedType(ttype); + } + (void)elaborated; + + tname = clang_str_get_alloca(clang_getTypeSpelling(ttype)); + + ttype_kind = clang_map_type_kind(ttype.kind, clang_getTypeDeclaration(ttype).kind); + + assert(ttype_kind != FYTK_INVALID); + + ftt = clang_lookup_type_by_type(rfl, ttype, NULL); + + if (!ftt && (ttype_kind == FYTK_INVALID || ttype_kind == FYTK_FUNCTION || + fy_type_kind_is_primitive(ttype_kind) || fy_type_kind_is_like_ptr(ttype_kind))) { + /* builtin or pointer but not registered */ + + memset(fttu, 0, sizeof(*fttu)); + fttu->type = ttype; + + if (ttype_kind == FYTK_FUNCTION) { + struct fy_decl *declf; + struct clang_decl_user declfu_local, *declfu = &declfu_local; + struct fy_import *imp; + + memset(declfu, 0, sizeof(*declfu)); + declfu->cursor = clang_getNullCursor(); + declfu->parent = clang_getNullCursor(); + declfu->is_fake_func = true; + + if (ft->decl) + imp = ft->decl->imp; + else + imp = rfl->imp_curr; + + assert(imp); + declf = fy_decl_create(rfl, imp, NULL, FYDT_FUNCTION, tname, NULL); + if (!declf) + goto err_out; + + fy_decl_list_add_tail(&rfl->decls, declf); + + ftt = fy_type_create(rfl, ttype_kind, tname, declf, fttu); + if (!ftt) + goto err_out; + + fy_type_list_add_tail(&rfl->types, ftt); + + declf->type = ftt; + } else { + ftt = fy_type_create(rfl, ttype_kind, tname, NULL, fttu); + if (!ftt) + goto err_out; + + fy_type_list_add_tail(&rfl->types, ftt); + + } + } + + ftb->dependent_type = ttype; + + clang_str_setup(&ftb->dependent_type_name, clang_getTypeSpelling(ttype)); + + if (ftt) { + ft->dependent_type = ftt; + ft->unresolved = false; + } else { + ft->unresolved = true; + rfl->unresolved_types_count++; + } + /* save this info */ + ft->dependent_type_kind = ttype_kind; + ft->dependent_type_name = clang_str_get(&ftb->dependent_type_name); + } + + /* look for unresolves that match */ + for (ft2 = fy_type_list_head(&rfl->types); + rfl->unresolved_types_count > 0 && ft2 != NULL; + ft2 = fy_type_next(&rfl->types, ft2)) { + + if (!ft2->unresolved) + continue; + + ft2b = ft2->backend; + + if (!clang_equalTypes(ft2b->dependent_type, ftb->type)) + continue; + + ft2->unresolved = false; + ft2->was_fwd_declared = true; + ft2->dependent_type = ft; + + assert(rfl->unresolved_types_count > 0); + rfl->unresolved_types_count--; + } + +out: + return ret; + +err_out: + clang_type_cleanup(ft); + ret = -1; + goto out; +} + +static void clang_type_cleanup(struct fy_type *ft) +{ + struct clang_type_backend *ftb; + + if (!ft || !ft->backend) + return; + + ftb = ft->backend; + ft->backend = NULL; + + clang_str_cleanup(&ftb->dependent_type_name); + + free(ftb); +} diff --git a/src/reflection/fy-clang-backend.h b/src/reflection/fy-clang-backend.h new file mode 100644 index 00000000..c6d99efe --- /dev/null +++ b/src/reflection/fy-clang-backend.h @@ -0,0 +1,27 @@ +/* + * fy-clang-backend.h - Clang reflection backend header + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_CLANG_BACKEND_H +#define FY_CLANG_BACKEND_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +struct fy_clang_backend_reflection_cfg { + bool display_diagnostics; +}; + +struct fy_clang_backend_import_cfg { + const char *file; + int argc; + const char * const *argv; +}; + +#endif diff --git a/src/reflection/fy-packed-backend.c b/src/reflection/fy-packed-backend.c new file mode 100644 index 00000000..0910e073 --- /dev/null +++ b/src/reflection/fy-packed-backend.c @@ -0,0 +1,1262 @@ +/* + * fy-packed-backend.c - Packed blob C type backend + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fy-endian.h" +#include "fy-blob.h" + +#include "fy-reflection-private.h" + +#include "fy-packed-backend.h" + +static int packed_reflection_setup(struct fy_reflection *rfl); +static void packed_reflection_cleanup(struct fy_reflection *rfl); + +static int packed_import_setup(struct fy_import *imp, const void *user); +static void packed_import_cleanup(struct fy_import *imp); + +static int packed_decl_setup(struct fy_decl *decl, void *user); +static void packed_decl_cleanup(struct fy_decl *decl); + +static int packed_type_setup(struct fy_type *ft, void *user); +static void packed_type_cleanup(struct fy_type *ft); + +static const struct fy_reflection_backend_ops packed_ops = { + .reflection_setup = packed_reflection_setup, + .reflection_cleanup = packed_reflection_cleanup, + .import_setup = packed_import_setup, + .import_cleanup = packed_import_cleanup, + .type_setup = packed_type_setup, + .type_cleanup = packed_type_cleanup, + .decl_setup = packed_decl_setup, + .decl_cleanup = packed_decl_cleanup, +}; + +const struct fy_reflection_backend fy_reflection_packed_backend = { + .name = "packed", + .ops = &packed_ops, +}; + +struct packed_reflect_backend { + const struct fy_packed_type_info *type_info; + /* if we're using a blob */ + struct fy_packed_type_info gen_type_info; + struct fy_type_p *gen_types; + struct fy_decl_p *gen_decls; + const void *blob; /* pointer to the original blob */ + size_t blob_size; + void *blob_copy; /* the copy */ +}; + +struct packed_import_backend { + char *name; +}; + +struct packed_decl_user { + const struct fy_decl_p *declp; + const struct fy_decl_p *decl_parentp; +}; + +struct packed_decl_backend { + const struct fy_decl_p *declp; + const struct fy_type_p *typep; +}; + +struct packed_type_user { + const struct fy_type_p *typep; +}; + +struct packed_type_backend { + const struct fy_type_p *typep; + const struct fy_type_p *dependent_typep; +}; + +static struct fy_type * +packed_lookup_type_by_typep(struct fy_reflection *rfl, const struct fy_type_p *fytp) +{ + struct fy_type *ft; + struct packed_type_backend *ftb; + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + ftb = ft->backend; + if (fytp == ftb->typep) + return ft; + } + return NULL; +} + +static char * +packed_type_generate_name(struct fy_reflection *rfl, const struct fy_type_p *fytp) +{ + struct packed_reflect_backend *rflb = rfl->backend; + const struct fy_packed_type_info *ti = rflb->type_info; + const struct fy_decl_p *declp; + enum fy_type_kind tk, dtk; + const char *basename, *declname, *pfx; + const struct fy_type_p *fytdp = NULL; + char *depname = NULL, *depname_ext = NULL; + const char *sep, *s; + FILE *fp; + char *buf; + size_t len; + int ret; + bool error = false; + + tk = fytp->type_kind; + basename = fy_type_kind_info_get_internal(tk)->name; + + buf = NULL; + len = 0; + fp = open_memstream(&buf, &len); + if (!fp) + return NULL; + + if (fytp->flags & FYTPF_CONST) { + ret = fprintf(fp, "const "); + if (ret < 0) + goto err_out; + } + + if (fytp->flags & FYTPF_VOLATILE) { + ret = fprintf(fp, "volatile "); + if (ret < 0) + goto err_out; + } + + if (fy_type_kind_is_primitive(tk) || tk == FYTK_INVALID) { + ret = fprintf(fp, "%s", basename); + if (ret < 0) + goto err_out; + goto out; + } + + declp = fy_decl_p_from_id(ti, fytp->decl); + if (!declp) + goto err_out; + + declname = fy_decl_p_name(ti, declp); + if (!declname) + goto err_out; + + if (fy_type_kind_is_record(tk) || tk == FYTK_ENUM || tk == FYTK_TYPEDEF || tk == FYTK_FUNCTION) { + ret = fprintf(fp, "%s", declname); + if (ret < 0) + goto err_out; + goto out; + } + + dtk = FYTK_INVALID; + if (fy_type_kind_is_dependent(tk)) { + fytdp = fy_type_p_from_id(ti, fytp->dependent_type); + if (!fytdp) + goto err_out; + dtk = fytdp->type_kind; + depname = packed_type_generate_name(rfl, fytdp); + if (!depname) + goto err_out; + + if (dtk == FYTK_STRUCT || dtk == FYTK_UNION) { + if (dtk == FYTK_STRUCT) + pfx = "struct"; + else + pfx = "union"; + ret = asprintf(&depname_ext, "%s %s", pfx, depname); + if (ret < 0) + goto err_out; + free(depname); + depname = depname_ext; + } + } + + buf = NULL; + switch (tk) { + + case FYTK_PTR: + if (dtk != FYTK_FUNCTION) { + sep = fy_type_kind_is_like_ptr(dtk) ? "" : " "; + ret = fprintf(fp, "%s%s*", depname, sep); + } else { + /* function names are int (int, char) like */ + + s = strchr(depname, '('); + if (s) + ret = fprintf(fp, "%.*s(*)%s", (int)(s - depname), depname, s); + else + ret = fprintf(fp, "%s *", depname); + } + if (fytp->flags & FYTPF_RESTRICT) { + ret = fprintf(fp, "restrict"); + if (ret < 0) + goto err_out; + } + break; + + case FYTK_INCOMPLETEARRAY: + sep = fy_type_kind_is_like_ptr(dtk) ? "" : " "; + ret = fprintf(fp, "%s%s[]", depname, sep); + break; + + case FYTK_CONSTARRAY: + sep = fy_type_kind_is_like_ptr(dtk) ? "" : " "; + ret = fprintf(fp, "%s%s[%llu]", depname, sep, fytp->element_count); + break; + + case FYTK_FUNCTION: + ret = fprintf(fp, "%s()", depname); + break; + default: + abort(); + break; + } + + if (ret < 0) + goto err_out; + +out: + if (depname) + free(depname); + + fclose(fp); + + if (error) { + free(buf); + buf = NULL; + } + + return buf; + +err_out: + error = true; + goto out; +} + +static int packed_reflection_setup_blob(struct fy_reflection *rfl); + +static int packed_reflection_setup(struct fy_reflection *rfl) +{ + struct packed_reflect_backend *rflb; + const struct fy_packed_backend_reflection_cfg *cfg = rfl->cfg.backend_cfg; + int ret; + + /* verify */ + if (!cfg) + return -1; + + switch (cfg->type) { + case FYPRT_TYPE_INFO: + if (!cfg->type_info) + return -1; + break; + + case FYPRT_BLOB: + if (!cfg->blob || cfg->blob_size < PGHDR_SIZE) + return -1; + break; + + default: + return -1; + } + + rflb = malloc(sizeof(*rflb)); + if (!rflb) + goto err_out; + memset(rflb, 0, sizeof(*rflb)); + + rfl->backend = rflb; + + switch (cfg->type) { + case FYPRT_TYPE_INFO: + rflb->type_info = cfg->type_info; + break; + + case FYPRT_BLOB: + ret = packed_reflection_setup_blob(rfl); + if (ret) + goto err_out; + break; + } + + return 0; + +err_out: + packed_reflection_cleanup(rfl); + return -1; +} + +static void packed_reflection_cleanup(struct fy_reflection *rfl) +{ + struct packed_reflect_backend *rflb; + + if (!rfl || !rfl->backend) + return; + + rflb = rfl->backend; + rfl->backend = NULL; + + if (rflb->gen_types) + free(rflb->gen_types); + if (rflb->gen_decls) + free(rflb->gen_decls); + + if (rflb->blob_copy) + free(rflb->blob_copy); + + free(rflb); +} + +static int packed_do_import(struct fy_import *imp) +{ + struct fy_reflection *rfl = imp->rfl; + struct packed_reflect_backend *rflb = rfl->backend; + struct packed_type_user fttu_local, *fttu = &fttu_local; + const struct fy_packed_type_info *ti = rflb->type_info; + const struct fy_type_p *fytp, *fyt_parentp; + const struct fy_decl_p *declp, *decl_parentp, *declpe; + struct packed_decl_user declu_local, *declu = &declu_local; + struct fy_type *ft; + struct fy_decl *decl, *decl_parent; + char *type_name; + + (void)rflb; + + /* now do the decls */ + declp = ti->decls; + declpe = declp + ti->decls_count; + while (declp < declpe) { + + assert(!fy_decl_type_has_parent(declp->decl_type)); + + fytp = fy_type_p_from_id(ti, declp->type); + + memset(declu, 0, sizeof(*declu)); + declu->declp = declp; + declu->decl_parentp = NULL; + + decl = fy_decl_create(rfl, imp, NULL, declp->decl_type, fy_decl_p_name(ti, declp), declu); + assert(decl); + if (!decl) + goto err_out; + + decl->type = packed_lookup_type_by_typep(rfl, fytp); + fy_decl_list_add_tail(&rfl->decls, decl); + + if (!fy_decl_type_has_children(declp->decl_type)) { + + if (!decl->type) { + memset(fttu, 0, sizeof(*fttu)); + fttu->typep = fytp; + + type_name = packed_type_generate_name(rfl, fytp); + ft = fy_type_create(rfl, fytp->type_kind, type_name, decl, fttu); + free(type_name); + assert(ft != NULL); + + fy_type_list_add_tail(&rfl->types, ft); + + decl->type = ft; + } + declp++; + continue; + } + + decl_parent = decl; + decl_parentp = declp; + fyt_parentp = fytp; + declp++; + + while (declp < declpe && fy_decl_type_has_parent(declp->decl_type)) { + + fytp = fy_type_p_from_id(ti, declp->type); + + memset(declu, 0, sizeof(*declu)); + declu->declp = declp; + declu->decl_parentp = decl_parentp; + + assert(decl_parent); + decl = fy_decl_create(rfl, imp, decl_parent, declp->decl_type, fy_decl_p_name(ti, declp), declu); + assert(decl); + if (!decl) + goto err_out; + + decl->type = packed_lookup_type_by_typep(rfl, fytp); + if (!decl->type) { + memset(fttu, 0, sizeof(*fttu)); + fttu->typep = fytp; + + type_name = packed_type_generate_name(rfl, fytp); + ft = fy_type_create(rfl, fytp->type_kind, type_name, decl, fttu); + free(type_name); + assert(ft != NULL); + + fy_type_list_add_tail(&rfl->types, ft); + + decl->type = ft; + } + + fy_decl_list_add_tail(&decl_parent->children, decl); + + declp++; + } + if (!decl_parent->type) { + + memset(fttu, 0, sizeof(*fttu)); + fttu->typep = fyt_parentp; + + type_name = packed_type_generate_name(rfl, fyt_parentp); + ft = fy_type_create(rfl, fyt_parentp->type_kind, type_name, decl_parent, fttu); + free(type_name); + assert(ft != NULL); + + fy_type_list_add_tail(&rfl->types, ft); + + decl_parent->type = ft; + } + } + + /* and fixup type size, aligns */ + fy_reflection_fixup_size_align(rfl); + + return 0; + +err_out: + return -1; +} + +static int packed_import_setup(struct fy_import *imp, const void *user) +{ + struct fy_reflection *rfl; + struct packed_reflect_backend *rflb; + struct packed_import_backend *impb; + int len, ret; + + rfl = imp->rfl; + rflb = rfl->backend; + + impb = malloc(sizeof(*impb)); + if (!impb) + goto err_out; + memset(impb, 0, sizeof(*impb)); + + imp->backend = impb; + + assert(rflb); + + len = snprintf(NULL, 0, "packed@%p", rflb->type_info); + if (len < 0) + goto err_out; + + impb->name = malloc(len + 1); + if (!impb->name) + goto err_out; + + snprintf(impb->name, len + 1, "packed@%p", rflb->type_info); + + /* forward */ + imp->name = impb->name; + + ret = packed_do_import(imp); + if (ret) + goto err_out; + + return 0; + +err_out: + packed_import_cleanup(imp); + return -1; +} + +static void packed_import_cleanup(struct fy_import *imp) +{ + struct packed_import_backend *impb; + + if (!imp || !imp->backend) + return; + + impb = imp->backend; + imp->backend = NULL; + + if (impb->name) + free(impb->name); + + free(impb); +} + +static int packed_decl_setup(struct fy_decl *decl, void *user) +{ + struct fy_reflection *rfl = decl->imp->rfl; + struct packed_reflect_backend *rflb = rfl->backend; + const struct fy_packed_type_info *ti = rflb->type_info; + const struct packed_decl_user *declu = user; + struct packed_decl_backend *declb; + const struct fy_decl_p *declp; + const struct fy_type_p *typep; + const char *name; + int signess; + + rfl = decl->imp->rfl; + + declb = malloc(sizeof(*declb)); + if (!declb) + goto err_out; + memset(declb, 0, sizeof(*declb)); + + decl->backend = declb; + + declp = declu->declp; + declb->declp = declp; + + typep = fy_type_p_from_id(ti, declp->type); + + name = fy_str_from_p(ti, declp->name); + + decl->anonymous = !name || !name[0]; + decl->in_system_header = false; + decl->from_main_file = true; + + switch (decl->decl_type) { + case FYDT_ENUM: + assert(typep); + break; + + case FYDT_ENUM_VALUE: + assert(decl->parent); + assert(decl->parent->decl_type == FYDT_ENUM); + + decl->enum_value_decl.type_kind = typep->type_kind; + signess = fy_type_kind_signess(decl->enum_value_decl.type_kind); + assert(signess != 0); + if (signess > 0) + decl->enum_value_decl.val.u = declp->enum_value.u; + else + decl->enum_value_decl.val.s = declp->enum_value.s; + break; + + case FYDT_FIELD: + assert(decl->parent); + assert(decl->parent->decl_type == FYDT_STRUCT || decl->parent->decl_type == FYDT_UNION); + decl->field_decl.byte_offset = 0; + break; + + case FYDT_BITFIELD: + assert(decl->parent); + assert(decl->parent->decl_type == FYDT_STRUCT || decl->parent->decl_type == FYDT_UNION); + decl->bitfield_decl.bit_offset = 0; + decl->bitfield_decl.bit_width = declp->bit_width; + break; + + default: + break; + } + + /* fill in the linear comments */ + decl->raw_comment = fy_str_from_p(ti, declp->comment); + + return 0; + +err_out: + packed_decl_cleanup(decl); + return -1; +} + +static void packed_decl_cleanup(struct fy_decl *decl) +{ + struct packed_decl_backend *declb; + + if (!decl || !decl->backend) + return; + + declb = decl->backend; + decl->backend = NULL; + + free(declb); +} + +static int packed_type_setup(struct fy_type *ft, void *user) +{ + struct fy_reflection *rfl = ft->rfl; + struct packed_reflect_backend *rflb = rfl->backend; + const struct fy_packed_type_info *ti = rflb->type_info; + const struct packed_type_user *ftu = user; + struct packed_type_backend *ftb; + const struct fy_type_p *typep; + struct fy_type *ft2; + struct packed_type_backend *ft2b; + enum fy_type_kind ttype_kind; + struct fy_type *ftt; + struct packed_type_user fttu_local, *fttu = &fttu_local; + char *dependent_type_name; + int ret = 0; + + ftb = malloc(sizeof(*ftb)); + if (!ftb) + goto err_out; + memset(ftb, 0, sizeof(*ftb)); + + typep = ftu->typep; + assert(typep); + + ftb->typep = typep; + + ft->backend = ftb; + + /* primitive types need not define size/align */ + if (fy_type_kind_is_primitive(ft->type_kind) || ft->type_kind == FYTK_FUNCTION) { + ft->size = fy_type_kind_info_get_internal(ft->type_kind)->size; + ft->align = fy_type_kind_info_get_internal(ft->type_kind)->align; + } else { + ft->size = 0; /* must be filled later */ + ft->align = 0; + } + + if (ft->type_kind == FYTK_CONSTARRAY) + ft->element_count = typep->element_count; + else + ft->element_count = 1; + + /* finally update the qualifiers */ + ft->is_const = !!(typep->flags & FYTPF_CONST); + ft->is_volatile = !!(typep->flags & FYTPF_VOLATILE); + ft->is_restrict = !!(typep->flags & FYTPF_RESTRICT); + + if (fy_type_kind_is_dependent(ft->type_kind)) { + ftb->dependent_typep = fy_type_p_from_id(ti, typep->dependent_type); + assert(ftb->dependent_typep); + ttype_kind = ftb->dependent_typep->type_kind; + ftt = packed_lookup_type_by_typep(rfl, ftb->dependent_typep); + if (!ftt && (ttype_kind == FYTK_INVALID || ttype_kind == FYTK_FUNCTION || + fy_type_kind_is_primitive(ttype_kind) || fy_type_kind_is_like_ptr(ttype_kind))) { + memset(fttu, 0, sizeof(*fttu)); + fttu->typep = ftb->dependent_typep; + + dependent_type_name = packed_type_generate_name(rfl, ftb->dependent_typep); + ftt = fy_type_create(rfl, ttype_kind, dependent_type_name, NULL, fttu); + free(dependent_type_name); + assert(ftt != NULL); + if (!ftt) + goto err_out; + + fy_type_list_add_tail(&rfl->types, ftt); + } + + if (ftt) { + ft->dependent_type = ftt; + ft->unresolved = false; + } else { + ft->unresolved = true; + rfl->unresolved_types_count++; + } + } + + /* look for unresolves that match */ + for (ft2 = fy_type_list_head(&rfl->types); + rfl->unresolved_types_count > 0 && ft2 != NULL; + ft2 = fy_type_next(&rfl->types, ft2)) { + + if (!ft2->unresolved) + continue; + + ft2b = ft2->backend; + + if (ft2b->dependent_typep != ftb->typep) + continue; + + ft2->unresolved = false; + ft2->was_fwd_declared = true; + ft2->dependent_type = ft; + + assert(rfl->unresolved_types_count > 0); + rfl->unresolved_types_count--; + } +out: + return ret; + +err_out: + packed_type_cleanup(ft); + ret = -1; + goto out; +} + +static void packed_type_cleanup(struct fy_type *ft) +{ + struct packed_type_backend *ftb; + + if (!ft || !ft->backend) + return; + + ftb = ft->backend; + ft->backend = NULL; + + free(ftb); +} + +/* generation */ + +static void fp_quoted_string(FILE *fp, const char *str) +{ + char c; + const char *p; + + // XXX should be utf8 (use libfyaml) + + fprintf(fp, "\""); + while ((c = *str++) != '\0') { + if (c > 0x7e) + continue; + if (c < 0x20) { + switch (c) { + case '\r': + p = "\\r"; + break; + case '\n': + p = "\\n"; + break; + case '\t': + p = "\\t"; + break; + default: + p = NULL; + break; + } + if (p) + fprintf(fp, "%s", p); + continue; + } + fprintf(fp, "%c", c); + } + fprintf(fp, "\""); +} + +static void decl_generate_one_fp(struct fy_packed_generator *pg, struct fy_decl *decl, FILE *fp) +{ + const char *yaml_comment; + char *raw_comment; + int signess; + + fprintf(fp, "\t[%d] = { .decl_type = %s, .name.str = \"%s\", .type.id = %d, ", + decl->id, + fy_decl_type_info_table[decl->decl_type].enum_name, + decl->name, + decl->type->id); + + switch (decl->decl_type) { + case FYDT_FIELD: + break; + + case FYDT_BITFIELD: + fprintf(fp, ".bit_width = %zu", decl->bitfield_decl.bit_width); + break; + + case FYDT_ENUM_VALUE: + fprintf(fp, ".enum_value = "); + signess = fy_type_kind_signess(decl->enum_value_decl.type_kind); + assert(signess != 0); + if (signess > 0) + fprintf(fp, "%llu", decl->enum_value_decl.val.u); + else + fprintf(fp, "%lld", decl->enum_value_decl.val.s); + fprintf(fp, ","); + break; + default: + break; + } + + yaml_comment = fy_decl_get_yaml_comment(decl); + if (yaml_comment) { + fprintf(fp, ".comment.str = "); + raw_comment = alloca(strlen("// yaml: ") + strlen(yaml_comment) + 1); + strcpy(raw_comment, "// yaml: "); + strcat(raw_comment, yaml_comment); + fp_quoted_string(fp, raw_comment); + fprintf(fp, ", "); + } + + fprintf(fp, "},"); + + fprintf(fp, "\n"); +} + +static void type_generate_one_fp(struct fy_packed_generator *pg, struct fy_type *ft, FILE *fp) +{ + static const char *qual_map[8] = { + "", /* RVC = 000 */ + "FYTPF_CONST", /* RVC = 001 */ + "FYTPF_VOLATILE", /* RVC = 010 */ + "FYTPF_VOLATILE | FYTPF_CONST", /* RVC = 011 */ + "FYTPF_RESTRICT", /* RVC = 100 */ + "FYTPF_RESTRICT | FYTPF_CONST", /* RVC = 101 */ + "FYTPF_RESTRICT | FYTPF_VOLATILE", /* RVC = 110 */ + "FYTPF_RESTRICT | FYTPF_VOLATILE | FYTPF_CONST",/* RVC = 111 */ + }; + int qual_id; + + fprintf(fp, "\t[%d] = { .type_kind = %s, ", + ft->id, + fy_type_kind_info_get_internal(ft->type_kind)->enum_name); + + if (ft->decl) + fprintf(fp, ".decl.id = %d, ", ft->decl->id); + + if (fy_type_kind_is_dependent(ft->type_kind)) + fprintf(fp, ".dependent_type.id = %d, ", + ft->dependent_type ? ft->dependent_type->id : -1); + + if (ft->type_kind == FYTK_CONSTARRAY) + fprintf(fp, ".element_count = %llu, ", ft->element_count); + + if (ft->is_const || ft->is_volatile || ft->is_restrict) { + qual_id = ((int)!!ft->is_restrict << 2) | + ((int)!!ft->is_volatile << 1) | + (int)!!ft->is_const; + + fprintf(fp, ".flags = %s, ",qual_map[qual_id]); + } + + fprintf(fp, "},"); + fprintf(fp, "\n"); +} + +static int packed_generate_fp(struct fy_packed_generator *pg, FILE *fp) +{ + struct fy_reflection *rfl = pg->rfl; + struct fy_decl *decl, *declc; + struct fy_type *ft; + const char *use_static, *decls_name, *types_name, *type_info_name; + + decls_name = pg->decls_name; + if (!decls_name) + decls_name = "decls"; + types_name = pg->types_name; + if (!types_name) + types_name = "types"; + type_info_name = pg->type_info_name; + if (!type_info_name) + type_info_name = "type_info"; + + use_static = pg->use_static ? "static " : ""; + + fprintf(fp, "%sconst struct fy_decl_p %s[] = {\n", use_static, decls_name); + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) { + decl_generate_one_fp(pg, decl, fp); + for (declc = fy_decl_list_head(&decl->children); declc != NULL; declc = fy_decl_next(&decl->children, declc)) + decl_generate_one_fp(pg, declc, fp); + } + fprintf(fp, "};\n"); + + fprintf(fp, "%sconst struct fy_type_p %s[] = {\n", use_static, types_name); + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + type_generate_one_fp(pg, ft, fp); + } + fprintf(fp, "};\n"); + + fprintf(fp, "%sconst struct fy_packed_type_info %s = {\n" + "\t.types = %s,\n" + "\t.types_count = sizeof(%s)/sizeof(%s[0]),\n" + "\t.decls = %s,\n" + "\t.decls_count = sizeof(%s)/sizeof(%s[0]),\n", + use_static, type_info_name, + types_name, types_name, types_name, + decls_name, decls_name, decls_name); + + fprintf(fp, "};\n"); + + return 0; +} + +#define BLOB_ENDIAN BET_BIG_ENDIAN + +struct blob_writer { + int Tc, Dc; // type count, decl count + enum blob_id_size Di, Ti, Si, Ci, Vi; // id sizes + br_wid_func Dwf, Twf, Swf, Cwf, Vwf; // write methods + br_rid_func Drf, Trf, Srf, Crf, Vrf; // write methods + size_t Ts, Ds, Ss, Hs; // sizes of areas in bytes + struct blob_region Hr, Tr, Dr, Sr; // header, type, decl, string regions +}; + +static void decl_generate_one_blob(struct fy_packed_generator *pg, struct fy_decl *decl, struct blob_writer *bw) +{ + const char *yaml_comment; + char *raw_comment; + + br_w8(&bw->Dr, (uint8_t)decl->decl_type); + br_wid(&bw->Dr, bw->Ti, decl->type->id); + br_wid(&bw->Dr, bw->Si, br_wstr(&bw->Sr, decl->name, (size_t)-1)); + switch (decl->decl_type) { + case FYDT_BITFIELD: + br_w8(&bw->Dr, (uint8_t)decl->bitfield_decl.bit_width); + break; + case FYDT_ENUM_VALUE: + br_wX(&bw->Dr, bw->Vi, decl->enum_value_decl.val.u); + break; + default: + break; + } + yaml_comment = fy_decl_get_yaml_comment(decl); + if (yaml_comment) { + raw_comment = alloca(strlen("// yaml: ") + strlen(yaml_comment) + 1); + strcpy(raw_comment, "// yaml: "); + strcat(raw_comment, yaml_comment); + br_wid(&bw->Dr, bw->Si, br_wstr(&bw->Sr, raw_comment, (size_t)-1)); + } else + br_wid(&bw->Dr, bw->Si, 0); +} + +static void type_generate_one_blob(struct fy_packed_generator *pg, struct fy_type *ft, struct blob_writer *bw) +{ + uint8_t flags; + + br_w8(&bw->Tr, (uint8_t)ft->type_kind); + flags = 0; + if (ft->is_const) + flags |= PGTF_CONST; + if (ft->is_volatile) + flags |= PGTF_VOLATILE; + if (ft->is_restrict) + flags |= PGTF_RESTRICT; + if (!fy_type_kind_is_primitive(ft->type_kind) && ft->decl) + flags |= PGTF_DECL; + if (fy_type_kind_is_dependent(ft->type_kind) && ft->dependent_type) + flags |= PGTF_DEP; + if (ft->type_kind == FYTK_CONSTARRAY) + flags |= PGTF_ECOUNT; + br_w8(&bw->Tr, flags); + + if (flags & PGTF_DECL) + br_wid(&bw->Tr, bw->Di, ft->decl->id); + + if (flags & PGTF_DEP) + br_wid(&bw->Tr, bw->Ti, ft->dependent_type->id); + + if (flags & PGTF_ECOUNT) + br_wid(&bw->Tr, bw->Ci, ft->element_count); +} + +static int packed_generate_blob_H(struct fy_packed_generator *pg, struct blob_writer *bw) +{ + br_w8(&bw->Hr, 'F'); /* format id */ + br_w8(&bw->Hr, 'Y'); + br_w8(&bw->Hr, 'P'); + br_w8(&bw->Hr, 'G'); + br_w8(&bw->Hr, 0); /* major version */ + br_w8(&bw->Hr, 1); /* minor version */ + br_w8(&bw->Hr, (uint8_t)bw->Ti); /* type id size */ + br_w8(&bw->Hr, (uint8_t)bw->Di); /* decl id size */ + br_w8(&bw->Hr, (uint8_t)bw->Si); /* string table offset size */ + br_w8(&bw->Hr, (uint8_t)bw->Ci); /* element count size */ + br_w8(&bw->Hr, (uint8_t)bw->Vi); /* enum value size */ + br_wskip_to(&bw->Hr, 0x10); + br_w64(&bw->Hr, bw->Tc); /* # of type entries */ + br_w64(&bw->Hr, bw->Ts); /* # size of type entries area */ + br_w64(&bw->Hr, bw->Dc); /* # of decl entries */ + br_w64(&bw->Hr, bw->Ds); /* # size of decl entries area */ + br_w64(&bw->Hr, bw->Ss); /* # size of string table area */ + + /* skip to end */ + br_wskip_to(&bw->Hr, PGHDR_SIZE); + + return 0; +} + +static int packed_generate_blob_TDS(struct fy_packed_generator *pg, struct blob_writer *bw) +{ + struct fy_reflection *rfl = pg->rfl; + struct fy_decl *decl, *declc; + struct fy_type *ft; + + /* always write a zero at offset zero of the strtab */ + /* this allows us to use offset zero as a NULL marker */ + br_w8(&bw->Sr, 0); + + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) { + decl_generate_one_blob(pg, decl, bw); + for (declc = fy_decl_list_head(&decl->children); declc != NULL; declc = fy_decl_next(&decl->children, declc)) + decl_generate_one_blob(pg, declc, bw); + } + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + type_generate_one_blob(pg, ft, bw); + + return 0; +} + +static int packed_generate_blob(struct fy_packed_generator *pg) +{ + struct fy_reflection *rfl = pg->rfl; + struct blob_writer bw_local, *bw = &bw_local; + void *blob = NULL; + size_t blob_size = 0; + uint8_t *p; + int ret; + + memset(bw, 0, sizeof(*bw)); + + bw->Tc = rfl->next_type_id; + bw->Dc = rfl->next_decl_id; + + bw->Ti = blob_count_to_id_size(bw->Tc); + bw->Di = blob_count_to_id_size(bw->Dc); + bw->Si = BID_U32; // hardcoded to 4 bytes for now + bw->Ci = BID_U64; // hardcoded to 8 bytes for now + bw->Vi = BID_U64; // hardcoded to 8 bytes for now + + bw->Twf = br_wid_get_func(bw->Ti); + bw->Trf = br_rid_get_func(bw->Ti); + bw->Dwf = br_wid_get_func(bw->Di); + bw->Drf = br_rid_get_func(bw->Di); + bw->Swf = br_wid_get_func(bw->Si); + bw->Srf = br_rid_get_func(bw->Si); + bw->Cwf = br_wid_get_func(bw->Ci); + bw->Crf = br_rid_get_func(bw->Ci); + bw->Vwf = br_wid_get_func(bw->Vi); + bw->Vrf = br_rid_get_func(bw->Vi); + + /* setup all for infinite non backed writes */ + br_wsetup(&bw->Tr, NULL, (size_t)-1, BLOB_ENDIAN); + br_wsetup(&bw->Dr, NULL, (size_t)-1, BLOB_ENDIAN); + br_wsetup(&bw->Sr, NULL, (size_t)-1, BLOB_ENDIAN); + + /* generate entries to get the extends */ + ret = packed_generate_blob_TDS(pg, bw); + if (ret) + goto err_out; + + bw->Ts = br_curr(&bw->Tr); + bw->Ds = br_curr(&bw->Dr); + bw->Ss = br_curr(&bw->Sr); + bw->Hs = PGHDR_SIZE; + + /* total size is header + type + decl + stringtab */ + blob_size = bw->Hs + bw->Ts + bw->Ds + bw->Ss; + blob = malloc(blob_size); + if (!blob) + goto err_out; + + /* setup again, pointing at the actual buffer */ + p = blob; + br_wsetup(&bw->Hr, p, bw->Hs, BLOB_ENDIAN); + p += bw->Hs; + + br_wsetup(&bw->Tr, p, bw->Ts, BLOB_ENDIAN); + p += bw->Ts; + + br_wsetup(&bw->Dr, p, bw->Ds, BLOB_ENDIAN); + p += bw->Ds; + + br_wsetup(&bw->Sr, p, bw->Ss, BLOB_ENDIAN); + p += bw->Ss; + + /* verify that everything was setup correctly */ + assert(p == blob + blob_size); + + ret = packed_generate_blob_H(pg, bw); + if (ret) + goto err_out; + + ret = packed_generate_blob_TDS(pg, bw); + if (ret) + goto err_out; + + *pg->blobp = blob; + *pg->blob_sizep = blob_size; + + return 0; +err_out: + if (blob) + free(blob); + return ret; +} + +int fy_packed_generate(struct fy_packed_generator *pg) +{ + struct fy_reflection *rfl; + FILE *fp = NULL; + int ret; + + if (!pg) + return -1; + + rfl = pg->rfl; + if (!rfl) + return -1; + + /* the generation relies on the renumbering settings ids correctly */ + fy_reflection_renumber(rfl); + + ret = -1; + switch (pg->type) { + case FYPGT_TO_FILE: + ret = packed_generate_fp(pg, pg->fp); + break; + + case FYPGT_TO_STRING: + fp = open_memstream(pg->strp, pg->str_sizep); + if (!fp) + break; + ret = packed_generate_fp(pg, fp); + fclose(fp); + fp = NULL; + break; + + case FYPGT_BLOB: + + if (!pg->blobp || !pg->blob_sizep) + return -1; + + ret = packed_generate_blob(pg); + break; + + default: + ret = -1; + break; + } + + return ret; +} + +static int packed_reflection_setup_blob(struct fy_reflection *rfl) +{ + const struct fy_packed_backend_reflection_cfg *cfg = rfl->cfg.backend_cfg; + struct packed_reflect_backend *rflb; + struct blob_region Br; + int i, Tc, Dc; // type count, decl count + enum blob_id_size Di, Ti, Si, Ci, Vi; // id sizes + size_t Ts, Ds, Ss; + uint8_t major, minor; + struct fy_type_p *typep; + struct fy_decl_p *declp; + uint8_t flags; + + rflb = rfl->backend; + + rflb->blob_size = cfg->blob_size; + + if (!cfg->copy) + rflb->blob = cfg->blob; + else { + rflb->blob_copy = malloc(rflb->blob_size); + if (!rflb->blob_copy) + return -1; + memcpy(rflb->blob_copy, cfg->blob, rflb->blob_size); + rflb->blob = rflb->blob_copy; + } + + br_rsetup(&Br, rflb->blob, rflb->blob_size, BLOB_ENDIAN); + /* FYPG */ + if (br_r8(&Br) != 'F' || br_r8(&Br) != 'Y' || br_r8(&Br) != 'P' || br_r8(&Br) != 'G') + goto err_out; + + /* major, minor version check */ + major = br_r8(&Br); + minor = br_r8(&Br); + if (major > 0) + goto err_out; + (void)minor; + + Ti = br_r8(&Br); + Di = br_r8(&Br); + Si = br_r8(&Br); + Ci = br_r8(&Br); + Vi = br_r8(&Br); + br_rskip_to(&Br, 0x10); + Tc = br_r64(&Br); + Ts = br_r64(&Br); + Dc = br_r64(&Br); + Ds = br_r64(&Br); + Ss = br_r64(&Br); + + br_rskip_to(&Br, PGHDR_SIZE); + + /* allocate expanded structures */ + rflb->gen_types = malloc(sizeof(*rflb->gen_types) * Tc); + if (!rflb->gen_types) + goto err_out; + memset(rflb->gen_types, 0, sizeof(*rflb->gen_types) * Tc); + rflb->gen_decls = malloc(sizeof(*rflb->gen_decls) * Dc); + if (!rflb->gen_decls) + goto err_out; + memset(rflb->gen_decls, 0, sizeof(*rflb->gen_decls) * Dc); + + for (typep = rflb->gen_types, i = 0; i < Tc; i++, typep++) { + typep->type_kind = br_r8(&Br); + assert(fy_type_kind_is_valid(typep->type_kind)); + + flags = br_r8(&Br); + if (flags & PGTF_CONST) + typep->flags |= FYTPF_CONST; + if (flags & PGTF_VOLATILE) + typep->flags |= FYTPF_VOLATILE; + if (flags & PGTF_RESTRICT) + typep->flags |= FYTPF_RESTRICT; + if (flags & PGTF_DECL) + typep->decl.id = br_rid(&Br, Di); + if (flags & PGTF_DEP) + typep->dependent_type.id = br_rid(&Br, Ti); + if (flags & PGTF_ECOUNT) + typep->element_count = br_rid(&Br, Ci); + } + + for (declp = rflb->gen_decls, i = 0; i < Dc; i++, declp++) { + declp->decl_type = br_r8(&Br); + assert(fy_decl_type_is_valid(declp->decl_type)); + declp->type.id = br_rid(&Br, Ti); + declp->name.offset = br_rX(&Br, Si); + switch (declp->decl_type) { + case FYDT_BITFIELD: + declp->bit_width = br_r8(&Br); + break; + case FYDT_ENUM_VALUE: + declp->enum_value.u = br_rX(&Br, Vi); + break; + default: + break; + } + declp->comment.offset = br_rX(&Br, Si); + } + + /* it must be consumed completely */ + assert(br_curr(&Br) == PGHDR_SIZE + Ts + Ds); + + rflb->gen_type_info.uses_pointers = false; + rflb->gen_type_info.types = rflb->gen_types; + rflb->gen_type_info.types_count = Tc; + rflb->gen_type_info.decls = rflb->gen_decls; + rflb->gen_type_info.decls_count = Dc; + rflb->gen_type_info.strtab = (const char *)(rflb->blob + PGHDR_SIZE + Ts + Ds); + rflb->gen_type_info.strtab_size = Ss; + + rflb->type_info = &rflb->gen_type_info; + + return 0; +err_out: + if (rflb->gen_types) { + free(rflb->gen_types); + rflb->gen_types = NULL; + } + if (rflb->gen_decls) { + free(rflb->gen_decls); + rflb->gen_decls = NULL; + } + return -1; +} diff --git a/src/reflection/fy-packed-backend.h b/src/reflection/fy-packed-backend.h new file mode 100644 index 00000000..9a8298c5 --- /dev/null +++ b/src/reflection/fy-packed-backend.h @@ -0,0 +1,263 @@ +/* + * fy-packed-backend.h - Packed blob reflection backend header + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_PACKED_BACKEND_H +#define FY_PACKED_BACKEND_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include "fy-reflection-private.h" + +/* packed structure */ + +struct fy_type_p; +struct fy_decl_p; + +typedef union { + const struct fy_decl_p *declp; + int id; +} fy_decl_p_id; + +typedef union { + const struct fy_type_p *fytp; + int id; +} fy_type_p_id; + +typedef union { + const char *str; + unsigned int offset; +} fy_p_str; + +typedef union { + long long s; + unsigned long long u; +} fy_p_enum_val; + +#define FYTPF_CONST 0x01 +#define FYTPF_VOLATILE 0x02 +#define FYTPF_RESTRICT 0x04 +#define FYTPF_UNRESOLVED 0x08 /* when this is unresolved dummy type */ + +struct fy_type_p { + enum fy_type_kind type_kind; + unsigned int flags; + fy_decl_p_id decl; + fy_type_p_id dependent_type; + unsigned long long element_count; +}; + +struct fy_decl_p { + enum fy_decl_type decl_type; + fy_p_str name; + fy_type_p_id type; + fy_p_str comment; + union { + fy_p_enum_val enum_value; + size_t bit_width; + }; +}; + +struct fy_packed_type_info { + bool uses_pointers; + const struct fy_type_p *types; + int types_count; + const struct fy_decl_p *decls; + int decls_count; + const char *strtab; + size_t strtab_size; +}; + +static inline const struct fy_type_p * +fy_type_p_from_id(const struct fy_packed_type_info *ti, const fy_type_p_id id) +{ + if (ti->uses_pointers) { + assert(!id.fytp || (id.fytp >= ti->types && id.fytp < &ti->types[ti->types_count])); + return id.fytp; + } + assert((unsigned int)id.id < (unsigned int)ti->types_count); + return ti->types + id.id; +} + +static inline const struct fy_decl_p * +fy_decl_p_from_id(const struct fy_packed_type_info *ti, const fy_decl_p_id id) +{ + if (ti->uses_pointers) { + assert(!id.declp || (id.declp >= ti->decls && id.declp < &ti->decls[ti->decls_count])); + return id.declp; + } + assert((unsigned int)id.id < (unsigned int)ti->decls_count); + return ti->decls + id.id; +} + +static inline const char * +fy_str_from_p(const struct fy_packed_type_info *ti, fy_p_str strp) +{ + if (ti->uses_pointers) + return strp.str; + + if (!strp.offset) + return NULL; + + assert(strp.offset < ti->strtab_size); + + return ti->strtab + strp.offset; +} + +static inline const char * +fy_decl_p_name(const struct fy_packed_type_info *ti, const struct fy_decl_p *declp) +{ + return fy_str_from_p(ti, declp->name); +} + +static inline const char * +fy_type_p_name(const struct fy_packed_type_info *ti, const struct fy_type_p *fytp) +{ + const struct fy_decl_p *declp; + + if (fy_type_kind_is_primitive(fytp->type_kind)) + return fy_type_kind_info_get_internal(fytp->type_kind)->name; + + declp = fy_decl_p_from_id(ti, fytp->decl); + return declp ? fy_decl_p_name(ti, declp) : ""; +} + +enum fy_packed_reflection_type { + FYPRT_TYPE_INFO, + FYPRT_BLOB, +}; + +struct fy_packed_backend_reflection_cfg { + enum fy_packed_reflection_type type; + union { + const struct fy_packed_type_info *type_info; + struct { + const void *blob; + size_t blob_size; + bool copy; + }; + }; +}; + +struct fy_packed_backend_import_cfg { + /* nothing, there is only a single import per this backend */ +}; + +extern const struct fy_reflection_backend fy_reflection_packed_backend; + +enum fy_packed_generator_type { + FYPGT_TO_FILE, + FYPGT_TO_STRING, + FYPGT_BLOB, +}; + +/* format of "BLOB" + * + * All larger than 8 bit values are in little-endian + * + * [Header] + * [TypeEntries] + * [DeclEntries] + * + * Header format: + * [00-03] 'F', 'Y' 'P' 'G' + * [04-04] Major version number of the format + * [05-05] Minor version number of the format + * [06-06] T Size of type ids (0=8bit, 1=16bit, 2=32bit, 3=64bit) + * [07-07] D Size of decl ids (0=8bit, 1=16bit, 2=32bit, 3=64bit) + * [08-08] S String offset count size (0=8bit, 1=16bit, 2=32bit, 3=64bit) + * [09-09] C element_count size (0=8bit, 1=16bit, 2=32bit, 3=64bit) + * [0a-0a] V enum value size (0=8bit, 1=16bit, 2=32bit, 3=64bit) + * [0a-0f] Reserved + * [10-17] Type table # entries + * [10-18] Type table size in bytes + * [20-27] Decl table # entries + * [28-2f] Decl table size in bytes + * [30-37] Size of string table + * [37-3f] Reserved + * Total size = 0x40 bytes + * + * D = Decl ID size in bytes + * T = Type ID size in bytes + * S = String table offset in bytes + * C = Element count size in bytes + * V = Enum value size in bytes + * + * Type table entries format + * offset size description + * --------- ---- ----------- + * 0 T type_kind + * T 1 flags + * - bit 0 -> const + * - bit 1 -> volatile + * - bit 2 -> restrict + * - bit 3 -> decl.id exists + * - bit 4 -> dependent_type.id exists + * - bit 5 -> element_count exists + * T+1 D decl.id (if exists) + * T+1+D T dependent_type.id (if exists) + * T+1+D+T C element count (if exists) + * --------- + * T+1+D+T+C max size of type entry + * 2*T+D+C+1 + * + * Decl table entries format + * offset size description + * ---------- ---- ----------- + * 0 1 decl_type + * 1 T type.id + * 1+T S name - string table offset + * 1+T+S | 1 bit_width if bitfield + * 1+T+S | V enum_value if enum_value + * 1+T+S+[V|1] S comment - string table offset + * ---------- + * 1+T+S+[V|1]+S maximum size of entry + * + */ + +#define PGHDR_SIZE 0x40 + +#define PGTF_CONST 0x01 +#define PGTF_VOLATILE 0x02 +#define PGTF_RESTRICT 0x04 +#define PGTF_DECL 0x08 +#define PGTF_DEP 0x10 +#define PGTF_ECOUNT 0x20 +#define PGTF_LOCATION 0x40 /* for future extension */ + +struct fy_packed_generator { + struct fy_reflection *rfl; + enum fy_packed_generator_type type; + union { + struct { + bool use_static; + const char *decls_name; + const char *types_name; + const char *type_info_name; + union { + FILE *fp; + struct { + char **strp; + size_t *str_sizep; + }; + }; + }; + struct { + void **blobp; + size_t *blob_sizep; + }; + }; +}; + +int fy_packed_generate(struct fy_packed_generator *pg); + +#endif diff --git a/src/reflection/fy-reflection-private.h b/src/reflection/fy-reflection-private.h new file mode 100644 index 00000000..b57991a6 --- /dev/null +++ b/src/reflection/fy-reflection-private.h @@ -0,0 +1,425 @@ +/* + * fy-reflection-private.h - Generic type reflection library header + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_REFLECTION_PRIVATE_H +#define FY_REFLECTION_PRIVATE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fy-endian.h" +#include "fy-typelist.h" + +#include "libfyaml.h" + +/* fwd */ +struct fy_reflection; +struct fy_import; +struct fy_source_file; +struct fy_decl; +struct fy_type; +struct fy_location; + +extern const struct fy_type_kind_info fy_type_kind_info_table[FYTK_COUNT]; + +static inline const struct fy_type_kind_info *fy_type_kind_info_get_internal(enum fy_type_kind type_kind) +{ + assert(fy_type_kind_is_valid(type_kind)); + return &fy_type_kind_info_table[type_kind]; +} + +FY_TYPE_FWD_DECL_LIST(type); +struct fy_type { + struct list_head node; + int id; + struct fy_reflection *rfl; + enum fy_type_kind type_kind; + char *fullname; /* including the prefix i.e. struct */ + const char *name; /* points in full name */ + char *normalized_name; + struct fy_decl *decl; + size_t size; + size_t align; + void *backend; + unsigned long long element_count; /* for const-array */ + bool unresolved; + bool anonymous; + struct fy_type *dependent_type; + enum fy_type_kind dependent_type_kind; + const char *dependent_type_name; + bool was_fwd_declared; + bool is_const; + bool is_restrict; + bool is_volatile; + bool is_fake_resolved; + bool is_synthetic; + bool is_fixed; + bool fix_in_progress; + bool marker; + void *fake_resolve_data; + + bool has_type_info; + bool producing_type_info; /* for handling recursives */ + /* the only public interface */ + struct fy_type_info type_info; + /* cache so that things are faster */ + struct fy_decl **field_decls; + + void *userdata; +}; +FY_TYPE_DECL_LIST(type); + +static inline struct fy_type * +fy_type_from_info(const struct fy_type_info *ti) +{ + if (!ti) + return NULL; + return container_of(ti, struct fy_type, type_info); +} + +FY_TYPE_FWD_DECL_LIST(source_file); +struct fy_source_file { + struct list_head node; + int id; + char *filename; + char *realpath; + time_t filetime; + bool system_header; + bool main_file; + bool marker; +}; +FY_TYPE_DECL_LIST(source_file); + +struct fy_source_location { + struct fy_source_file *source_file; + unsigned int line; + unsigned int column; + size_t offset; +}; + +enum fy_decl_type { + /* no declaration */ + FYDT_NONE, + + /* declarations */ + FYDT_STRUCT, + FYDT_UNION, + FYDT_CLASS, + FYDT_ENUM, + FYDT_TYPEDEF, + FYDT_FUNCTION, + + /* fields */ + FYDT_FIELD, + FYDT_BITFIELD, + FYDT_ENUM_VALUE, +}; +#define FYDT_COUNT (FYDT_ENUM_VALUE + 1) + +extern const char *decl_type_txt[FYDT_COUNT]; + +struct fy_decl_type_info { + enum fy_decl_type type; + const char *name; + const char *enum_name; +}; + +extern const struct fy_decl_type_info fy_decl_type_info_table[FYDT_COUNT]; + +static inline bool fy_decl_type_is_valid(enum fy_decl_type type) +{ + return type >= FYDT_STRUCT && type <= FYDT_ENUM_VALUE; +} + +static inline bool fy_decl_type_has_fields(enum fy_decl_type type) +{ + return type >= FYDT_STRUCT && type <= FYDT_ENUM; +} + +static inline bool fy_decl_type_is_field(enum fy_decl_type type) +{ + return type >= FYDT_FIELD && type <= FYDT_ENUM_VALUE; +} + +static inline bool fy_decl_type_has_children(enum fy_decl_type type) +{ + return type >= FYDT_STRUCT && type <= FYDT_ENUM; +} + +static inline bool fy_decl_type_has_parent(enum fy_decl_type type) +{ + return type >= FYDT_FIELD && type <= FYDT_ENUM_VALUE; +} + +FY_TYPE_FWD_DECL_LIST(decl); +struct fy_decl { + struct list_head node; + struct fy_reflection *rfl; + struct fy_import *imp; + struct fy_decl *parent; + enum fy_decl_type decl_type; + char *name; + const struct fy_source_location *source_location; + const char *spelling; + const char *display_name; + const char *signature; + struct fy_decl_list children; + const char *raw_comment; + struct fy_type *type; + bool anonymous; + int id; + void *backend; + bool marker; + + bool in_system_header; + bool from_main_file; + bool is_synthetic; + void *userdata; + + /* must be freed if not NULL */ + char *cooked_comment; + struct fy_document *fyd_yaml; /* the YAML keyworded */ + bool fyd_yaml_parsed; + char *yaml_comment; + + union { + struct { + enum fy_type_kind type_kind; + } enum_decl; + struct { + enum fy_type_kind type_kind; + union { + long long s; + unsigned long long u; + } val; + } enum_value_decl; + struct { + size_t byte_offset; + } field_decl; + struct { + size_t bit_offset; + size_t bit_width; + } bitfield_decl; + }; +}; +FY_TYPE_DECL_LIST(decl); + +FY_TYPE_FWD_DECL_LIST(import); +struct fy_import { + struct list_head node; + struct fy_reflection *rfl; + const char *name; + void *backend; + bool marker; +}; +FY_TYPE_DECL_LIST(import); + +struct fy_reflection_backend_ops { + int (*reflection_setup)(struct fy_reflection *rfl); + void (*reflection_cleanup)(struct fy_reflection *rfl); + + int (*import_setup)(struct fy_import *imp, const void *frontend_user); + void (*import_cleanup)(struct fy_import *imp); + + int (*type_setup)(struct fy_type *ft, void *backend_user); + void (*type_cleanup)(struct fy_type *ft); + + int (*decl_setup)(struct fy_decl *decl, void *backend_user); + void (*decl_cleanup)(struct fy_decl *decl); +}; + +struct fy_reflection_backend { + const char *name; + const struct fy_reflection_backend_ops *ops; +}; + +struct fy_reflection_cfg { + const void *backend_cfg; + const struct fy_reflection_backend *backend; +}; + +struct fy_reflection { + struct fy_reflection_cfg cfg; + struct fy_import_list imports; + struct fy_source_file_list source_files; + struct fy_type_list types; + struct fy_decl_list decls; + int unresolved_types_count; + int next_type_id; + int next_decl_id; + int next_source_file_id; + void *backend; + struct fy_import *imp_curr; /* the current import */ +}; + +struct fy_type *fy_type_create(struct fy_reflection *rfl, enum fy_type_kind type_kind, const char *name, struct fy_decl *decl, void *user); +void fy_type_destroy(struct fy_type *ft); +void fy_type_fixup_size_align(struct fy_type *ft); + +bool fy_type_is_pointer(struct fy_type *ft); +bool fy_type_is_array(struct fy_type *ft); +bool fy_type_is_constant_array(struct fy_type *ft); +bool fy_type_is_incomplete_array(struct fy_type *ft); +int fy_type_get_constant_array_element_count(struct fy_type *ft); +struct fy_type *fy_type_get_dependent_type(struct fy_type *ft); +size_t fy_type_get_sizeof(struct fy_type *ft); +size_t fy_type_get_alignof(struct fy_type *ft); +int fy_type_get_field_count(struct fy_type *ft); +struct fy_decl *fy_type_get_field_decl_by_name(struct fy_type *ft, const char *field); +struct fy_decl *fy_type_get_field_decl_by_enum_value(struct fy_type *ft, long long val); +struct fy_decl *fy_type_get_field_decl_by_unsigned_enum_value(struct fy_type *ft, unsigned long long val); +int fy_type_get_field_index_by_name(struct fy_type *ft, const char *field); +struct fy_decl *fy_type_get_field_decl_by_idx(struct fy_type *ft, unsigned int idx); +int fy_type_get_field_idx_by_decl(struct fy_type *ft, struct fy_decl *decl); +const struct fy_source_location *fy_type_get_decl_location(struct fy_type *ft); +const char *fy_type_get_raw_comment(struct fy_type *ft); +const char *fy_type_get_cooked_comment(struct fy_type *ft); +struct fy_document *fy_type_get_yaml_annotation(struct fy_type *ft); +const char *fy_type_get_yaml_comment(struct fy_type *ft); + +void fy_decl_destroy(struct fy_decl *decl); +struct fy_decl *fy_decl_create(struct fy_reflection *rfl, struct fy_import *imp, + struct fy_decl *parent, enum fy_decl_type decl_type, const char *name, void *user); +const char *fy_decl_get_type_kind_spelling(struct fy_decl *decl); +const char *fy_decl_get_type_spelling(struct fy_decl *decl); +bool fy_decl_enum_value_is_unsigned(struct fy_decl *decl); +long long fy_decl_enum_value_signed(struct fy_decl *decl); +unsigned long long fy_decl_enum_value_unsigned(struct fy_decl *decl); +bool fy_decl_field_is_bitfield(struct fy_decl *decl); +size_t fy_decl_field_offsetof(struct fy_decl *decl); +size_t fy_decl_field_bit_offsetof(struct fy_decl *decl); +size_t fy_decl_field_sizeof(struct fy_decl *decl); +size_t fy_decl_field_bit_width(struct fy_decl *decl); +bool fy_decl_is_in_system_header(struct fy_decl *decl); +bool fy_decl_is_from_main_file(struct fy_decl *decl); +const struct fy_source_location *fy_decl_get_location(struct fy_decl *decl); +const char *fy_decl_get_spelling(struct fy_decl *decl); +const char *fy_decl_get_display_name(struct fy_decl *decl); +const char *fy_decl_get_signature(struct fy_decl *decl); +const char *fy_decl_get_raw_comment(struct fy_decl *decl); +const char *fy_decl_get_cooked_comment(struct fy_decl *decl); +struct fy_document *fy_decl_get_yaml_annotation(struct fy_decl *decl); +const char *fy_decl_get_yaml_comment(struct fy_decl *decl); + +struct fy_import *fy_import_create(struct fy_reflection *rfl, const void *user); +void fy_import_destroy(struct fy_import *imp); + +const char *fy_import_get_target_triple(struct fy_import *imp); + +struct fy_source_file *fy_source_file_create(struct fy_reflection *rfl, const char *filename); +void fy_source_file_destroy(struct fy_source_file *srcf); +struct fy_source_file *fy_reflection_lookup_source_file(struct fy_reflection *rfl, const char *filename); +void fy_source_file_dump(struct fy_source_file *srcf); + +struct fy_reflection *fy_reflection_create(const struct fy_reflection_cfg *rflc); +int fy_reflection_import(struct fy_reflection *rfl, const void *user); +struct fy_type *fy_reflection_lookup_type(struct fy_reflection *rfl, enum fy_type_kind type_kind, const char *name); +void fy_reflection_renumber(struct fy_reflection *rfl); +void fy_reflection_fix_unresolved(struct fy_reflection *rfl); +void fy_reflection_update_type_info(struct fy_reflection *rfl); + +void fy_reflection_fixup_size_align(struct fy_reflection *rfl); +void fy_reflection_dump(struct fy_reflection *rfl, bool marked_only, bool no_location); + +static inline int fy_type_id(const struct fy_type *ft) +{ + return ft->id; +} + +static inline bool fy_type_is_anonymous(const struct fy_type *ft) +{ + return ft->anonymous; +} + +static inline bool fy_type_is_declared(const struct fy_type *ft) +{ + return ft && !fy_type_kind_is_primitive(ft->type_kind) && ft->decl; +} + +static inline bool fy_type_is_resolved(const struct fy_type *ft) +{ + return ft && !ft->unresolved; +} + +const char *fy_import_get_name(struct fy_import *imp); +void fy_import_clear_marker(struct fy_import *imp); +void fy_import_mark(struct fy_import *imp); + +void fy_decl_clear_marker(struct fy_decl *decl); +void fy_decl_mark(struct fy_decl *decl); + +void fy_type_clear_marker(struct fy_type *ft); +void fy_type_mark(struct fy_type *ft); + +void fy_source_file_clear_marker(struct fy_source_file *srcf); +void fy_source_file_mark(struct fy_source_file *srcf); + +const struct fy_reflection_backend * +fy_reflection_backend_lookup(const char *name); + +char *fy_type_generate_name(struct fy_type *ft, const char *field, bool normalized); +struct fy_decl *fy_type_get_anonymous_parent_decl(struct fy_type *ft); +size_t fy_type_eponymous_offset(struct fy_type *ft); + +static inline void fy_type_set_userdata(struct fy_type *ft, void *data) +{ + if (!ft) + return; + ft->userdata = data; +} + +static inline void *fy_type_get_userdata(struct fy_type *ft) +{ + return ft ? ft->userdata : NULL; +} + +static inline void fy_decl_set_userdata(struct fy_decl *decl, void *data) +{ + if (!decl) + return; + decl->userdata = data; +} + +static inline void *fy_decl_get_userdata(struct fy_decl *decl) +{ + return decl ? decl->userdata : NULL; +} + +static inline struct fy_decl *fy_decl_from_field_info(const struct fy_field_info *fi) +{ + struct fy_type *ft; + int idx; + + if (!fi) + return NULL; + ft = fy_type_from_info(fi->parent); + assert(ft); + idx = fy_field_info_index(fi); + assert((size_t)idx < fi->parent->count && ft->field_decls); + return ft->field_decls[idx]; +} + +enum fy_reflection_record_missing_fields { + FYRRMF_FORBIDDEN, /* yaml: { name: forbidden } */ + FYRRMF_PERMITTED, /* yaml: { name: permitted } */ +}; + +struct fy_reflection_record_schema { + enum fy_reflection_record_missing_fields missing_fields; /* yaml: { name: missing-fields } */ +}; + +#endif diff --git a/src/reflection/fy-reflection.c b/src/reflection/fy-reflection.c new file mode 100644 index 00000000..d7a95327 --- /dev/null +++ b/src/reflection/fy-reflection.c @@ -0,0 +1,3010 @@ +/* + * fy-reflection.c - Generic type reflection library + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fy-utf8.h" +#include "fy-ctype.h" +#include "fy-reflection-private.h" + +static struct fy_document *get_yaml_document(const char *cooked_comment); + +static void fy_type_reset_type_info(struct fy_type *ft); +static struct fy_type_info *fy_type_get_type_info(struct fy_type *ft); + +static inline int backend_reflection_setup(struct fy_reflection *rfl) +{ + return rfl->cfg.backend->ops->reflection_setup(rfl); +} + +static inline void backend_reflection_cleanup(struct fy_reflection *rfl) +{ + rfl->cfg.backend->ops->reflection_cleanup(rfl); +} + +static inline int backend_import_setup(struct fy_import *imp, const void *user) +{ + return imp->rfl->cfg.backend->ops->import_setup(imp, user); +} + +static inline void backend_import_cleanup(struct fy_import *imp) +{ + imp->rfl->cfg.backend->ops->import_cleanup(imp); +} + +static inline int backend_type_setup(struct fy_type *ft, void *user) +{ + return ft->rfl->cfg.backend->ops->type_setup(ft, user); +} + +static inline void backend_type_cleanup(struct fy_type *ft) +{ + ft->rfl->cfg.backend->ops->type_cleanup(ft); +} + +static inline int backend_decl_setup(struct fy_decl *decl, void *user) +{ + return decl->imp->rfl->cfg.backend->ops->decl_setup(decl, user); +} + +static inline void backend_decl_cleanup(struct fy_decl *decl) +{ + decl->imp->rfl->cfg.backend->ops->decl_cleanup(decl); +} + +const struct fy_decl_type_info fy_decl_type_info_table[FYDT_COUNT] = { + [FYDT_NONE] = { + .type = FYDT_NONE, + .name = "none", + .enum_name = "FYDT_NONE", + }, + [FYDT_STRUCT] = { + .type = FYDT_STRUCT, + .name = "struct", + .enum_name = "FYDT_STRUCT", + }, + [FYDT_UNION] = { + .type = FYDT_UNION, + .name = "union", + .enum_name = "FYDT_UNION", + }, + [FYDT_CLASS] = { + .type = FYDT_CLASS, + .name = "class", + .enum_name = "FYDT_CLASS", + }, + [FYDT_ENUM] = { + .type = FYDT_ENUM, + .name = "enum", + .enum_name = "FYDT_ENUM", + }, + [FYDT_TYPEDEF] = { + .type = FYDT_TYPEDEF, + .name = "typedef", + .enum_name = "FYDT_TYPEDEF", + }, + [FYDT_FUNCTION] = { + .type = FYDT_FUNCTION, + .name = "function", + .enum_name = "FYDT_FUNCTION", + }, + [FYDT_FIELD] = { + .type = FYDT_FIELD, + .name = "field", + .enum_name = "FYDT_FIELD", + }, + [FYDT_BITFIELD] = { + .type = FYDT_BITFIELD, + .name = "bit-field", + .enum_name = "FYDT_BITFIELD", + }, + [FYDT_ENUM_VALUE] = { + .type = FYDT_ENUM_VALUE, + .name = "enum-value", + .enum_name = "FYDT_ENUM_VALUE", + }, +}; + +const struct fy_type_kind_info fy_type_kind_info_table[FYTK_COUNT] = { + [FYTK_INVALID] = { + .kind = FYTK_INVALID, + .name = "*invalid*", + .enum_name = "FYTK_INVALID", + }, + [FYTK_VOID] = { + .kind = FYTK_VOID, + .name = "void", + .size = 0, + .align = 0, + .enum_name = "FYTK_VOID", + }, + [FYTK_BOOL] = { + .kind = FYTK_BOOL, + .name = "_Bool", + .size = sizeof(_Bool), + .align = alignof(_Bool), + .enum_name = "FYTK_BOOL", + }, + [FYTK_CHAR] = { + .kind = FYTK_CHAR, + .name = "char", + .size = sizeof(char), + .align = alignof(char), + .enum_name = "FYTK_CHAR", + }, + [FYTK_SCHAR] = { + .kind = FYTK_SCHAR, + .name = "signed char", + .size = sizeof(signed char), + .align = alignof(signed char), + .enum_name = "FYTK_SCHAR", + }, + [FYTK_UCHAR] = { + .kind = FYTK_UCHAR, + .name = "unsigned char", + .size = sizeof(unsigned char), + .align = alignof(unsigned char), + .enum_name = "FYTK_UCHAR", + }, + [FYTK_SHORT] = { + .kind = FYTK_SHORT, + .name = "short", + .size = sizeof(short), + .align = alignof(short), + .enum_name = "FYTK_SHORT", + }, + [FYTK_USHORT] = { + .kind = FYTK_USHORT, + .name = "unsigned short", + .size = sizeof(unsigned short), + .align = alignof(unsigned short), + .enum_name = "FYTK_USHORT", + }, + [FYTK_INT] = { + .kind = FYTK_INT, + .name = "int", + .size = sizeof(int), + .align = alignof(int), + .enum_name = "FYTK_INT", + }, + [FYTK_UINT] = { + .kind = FYTK_UINT, + .name = "unsigned int", + .size = sizeof(unsigned int), + .align = alignof(unsigned int), + .enum_name = "FYTK_UINT", + }, + [FYTK_LONG] = { + .kind = FYTK_LONG, + .name = "long", + .size = sizeof(long), + .align = alignof(long), + .enum_name = "FYTK_LONG", + }, + [FYTK_ULONG] = { + .kind = FYTK_ULONG, + .name = "unsigned long", + .size = sizeof(unsigned long), + .align = alignof(unsigned long), + .enum_name = "FYTK_ULONG", + }, + [FYTK_LONGLONG] = { + .kind = FYTK_LONGLONG, + .name = "long long", + .size = sizeof(long long), + .align = alignof(long long), + .enum_name = "FYTK_LONGLONG", + }, + [FYTK_ULONGLONG] = { + .kind = FYTK_ULONGLONG, + .name = "unsigned long long", + .size = sizeof(unsigned long), + .align = alignof(unsigned long), + .enum_name = "FYTK_ULONGLONG", + }, +#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16 + [FYTK_INT128] = { + .kind = FYTK_INT128, + .name = "__int128", + .size = sizeof(__int128), + .align = alignof(__int128), + .enum_name = "FYTK_INT128", + }, + [FYTK_UINT128] = { + .kind = FYTK_UINT128, + .name = "unsigned __int128", + .size = sizeof(unsigned __int128), + .align = alignof(unsigned __int128), + .enum_name = "FYTK_UINT128", + }, +#else + [FYTK_INT128] = { + .kind = FYTK_INVALID, + .name = "__int128", + .enum_name = "FYTK_INT128", + }, + [FYTK_UINT128] = { + .kind = FYTK_INVALID, + .name = "unsigned __int128", + .enum_name = "FYTK_UINT128", + }, +#endif + [FYTK_FLOAT] = { + .kind = FYTK_FLOAT, + .name = "float", + .size = sizeof(float), + .align = alignof(float), + .enum_name = "FYTK_FLOAT", + }, + [FYTK_DOUBLE] = { + .kind = FYTK_DOUBLE, + .name = "double", + .size = sizeof(double), + .align = alignof(double), + .enum_name = "FYTK_DOUBLE", + }, + [FYTK_LONGDOUBLE] = { + .kind = FYTK_LONGDOUBLE, + .name = "long double", + .size = sizeof(double), + .align = alignof(double), + .enum_name = "FYTK_LONGDOUBLE", + }, +#ifdef FY_HAS_FP16 + [FYTK_FLOAT16] = { + .kind = FYTK_FLOAT16, + .name = "__fp16", + .size = sizeof(__fp16), + .align = alignof(__fp16), + .enum_name = "FYTK_FLOAT16", + }, +#else + [FYTK_FLOAT16] = { + .kind = FYTK_INVALID, + .name = "__fp16", + .enum_name = "FYTK_FLOAT16", + }, +#endif +#ifdef FY_HAS_FLOAT128 + [FYTK_FLOAT128] = { + .kind = FYTK_FLOAT128, + .name = "__float128", + .size = sizeof(__float128), + .align = alignof(__float128), + .enum_name = "FYTK_FLOAT128", + }, +#else + [FYTK_FLOAT128] = { + .kind = FYTK_INVALID, + .name = "__float128", + .enum_name = "FYTK_FLOAT128", + }, +#endif + /* the explicitly sized types are not generated */ + /* they must be explicitly created */ + [FYTK_S8] = { + .kind = FYTK_S8, + .name = "int8_t", + .size = sizeof(int8_t), + .align = alignof(int8_t), + .enum_name = "FYTK_S8", + }, + [FYTK_U8] = { + .kind = FYTK_U8, + .name = "uint8_t", + .size = sizeof(uint8_t), + .align = alignof(uint8_t), + .enum_name = "FYTK_U8", + }, + [FYTK_S16] = { + .kind = FYTK_S16, + .name = "int16_t", + .size = sizeof(int16_t), + .align = alignof(int16_t), + .enum_name = "FYTK_S16", + }, + [FYTK_U16] = { + .kind = FYTK_U16, + .name = "uint16_t", + .size = sizeof(uint16_t), + .align = alignof(uint16_t), + .enum_name = "FYTK_U16", + }, + [FYTK_S32] = { + .kind = FYTK_S32, + .name = "int32_t", + .size = sizeof(int32_t), + .align = alignof(int32_t), + .enum_name = "FYTK_S32", + }, + [FYTK_U32] = { + .kind = FYTK_U32, + .name = "uint32_t", + .size = sizeof(uint32_t), + .align = alignof(uint32_t), + .enum_name = "FYTK_U32", + }, + [FYTK_S64] = { + .kind = FYTK_S64, + .name = "int64_t", + .size = sizeof(int64_t), + .align = alignof(int64_t), + .enum_name = "FYTK_S64", + }, + [FYTK_U64] = { + .kind = FYTK_U64, + .name = "uint64_t", + .size = sizeof(uint64_t), + .align = alignof(uint64_t), + .enum_name = "FYTK_U64", + }, +#ifdef FY_HAS_INT128 + [FYTK_S128] = { + .kind = FYTK_S128, + .name = "__int128", + .size = sizeof(__int128), + .align = alignof(__int128), + .enum_name = "FYTK_S128", + }, + [FYTK_U128] = { + .kind = FYTK_U128, + .name = "unsigned __int128", + .size = sizeof(unsigned __int128), + .align = alignof(unsigned __int128), + .enum_name = "FYTK_U128", + }, +#else + [FYTK_S128] = { + .kind = FYTK_INVALID, + .name = "__int128", + .enum_name = "FYTK_S128", + }, + [FYTK_U128] = { + .kind = FYTK_INVALID, + .name = "unsigned __int128", + .enum_name = "FYTK_U128", + }, +#endif + + /* these are templates */ + [FYTK_RECORD] = { + .kind = FYTK_RECORD, + .name = "", + .enum_name = "FYTK_RECORD", + }, + [FYTK_STRUCT] = { + .kind = FYTK_STRUCT, + .name = "struct", + .enum_name = "FYTK_STRUCT", + }, + [FYTK_UNION] = { + .kind = FYTK_UNION, + .name = "union", + .enum_name = "FYTK_UNION", + }, + [FYTK_ENUM] = { + .kind = FYTK_ENUM, + .name = "enum", + .enum_name = "FYTK_ENUM", + }, + [FYTK_TYPEDEF] = { + .kind = FYTK_TYPEDEF, + .name = "typedef", + .enum_name = "FYTK_TYPEDEF", + }, + [FYTK_PTR] = { + .kind = FYTK_PTR, + .name = "ptr", + .enum_name = "FYTK_PTR", + }, + [FYTK_CONSTARRAY] = { + .kind = FYTK_CONSTARRAY, + .name = "carray", + .enum_name = "FYTK_CONSTARRAY", + }, + [FYTK_INCOMPLETEARRAY] = { + .kind = FYTK_INCOMPLETEARRAY, + .name = "iarray", + .enum_name = "FYTK_INCOMPLETEARRAY", + }, + [FYTK_FUNCTION] = { + .kind = FYTK_FUNCTION, + .name = "func", + .enum_name = "FYTK_FUNCTION", + .size = 1, // fake size and align numbers + .align = alignof(int), + }, +}; + +const struct fy_type_kind_info * +fy_type_kind_info_get(enum fy_type_kind type_kind) +{ + if (!fy_type_kind_is_valid(type_kind)) + return NULL; + return fy_type_kind_info_get_internal(type_kind); +} + +int fy_type_kind_signess(enum fy_type_kind type_kind) +{ + if (!fy_type_kind_is_numeric(type_kind)) + return 0; + + switch (type_kind) { + case FYTK_CHAR: + return CHAR_MIN < 0 ? -1 : 1; + case FYTK_SCHAR: + case FYTK_SHORT: + case FYTK_INT: + case FYTK_LONG: + case FYTK_LONGLONG: + case FYTK_INT128: + case FYTK_FLOAT: + case FYTK_DOUBLE: + case FYTK_LONGDOUBLE: + case FYTK_FLOAT16: + case FYTK_FLOAT128: + case FYTK_S8: + case FYTK_S16: + case FYTK_S32: + case FYTK_S64: + case FYTK_S128: + return -1; + case FYTK_BOOL: + case FYTK_UCHAR: + case FYTK_USHORT: + case FYTK_UINT: + case FYTK_ULONG: + case FYTK_ULONGLONG: + case FYTK_UINT128: + case FYTK_U8: + case FYTK_U16: + case FYTK_U32: + case FYTK_U64: + case FYTK_U128: + return 1; + default: + break; + } + return 0; +} + +const char *decl_type_txt[FYDT_COUNT] = { + [FYDT_NONE] = "none", + [FYDT_STRUCT] = "struct", + [FYDT_UNION] = "union", + [FYDT_CLASS] = "class", + [FYDT_ENUM] = "enum", + [FYDT_TYPEDEF] = "typedef", + [FYDT_FUNCTION] = "function", + [FYDT_FIELD] = "field", + [FYDT_BITFIELD] = "bit-field", + [FYDT_ENUM_VALUE]= "enum-value", +}; + +void fy_type_destroy(struct fy_type *ft) +{ + struct fy_reflection *rfl; + + if (!ft) + return; + + fy_type_reset_type_info(ft); + + rfl = ft->rfl; + + /* if we're deleting an unresolved type, decrease the counter */ + if (!fy_type_is_resolved(ft)) + rfl->unresolved_types_count--; + + backend_type_cleanup(ft); + + if (ft->fake_resolve_data) + free(ft->fake_resolve_data); + if (ft->fullname) + free(ft->fullname); + if (ft->normalized_name) + free(ft->normalized_name); + free(ft); +} + +struct fy_type *fy_type_create(struct fy_reflection *rfl, enum fy_type_kind type_kind, const char *name, struct fy_decl *decl, void *user) +{ + const char *pfx; + struct fy_type *ft; + int rc; + + /* guard against rollover */ + if (rfl->next_type_id + 1 <= 0) + return NULL; + + ft = malloc(sizeof(*ft)); + if (!ft) + goto err_out; + memset(ft, 0, sizeof(*ft)); + + ft->rfl = rfl; + ft->type_kind = type_kind; + + if (fy_type_kind_has_prefix(type_kind)) { + pfx = fy_type_kind_info_get_internal(type_kind)->name; + rc = asprintf(&ft->fullname, "%s %s", pfx, name); + if (rc < 0) + goto err_out; + ft->name = ft->fullname + strlen(pfx) + 1; + } else { + ft->fullname = strdup(name); + if (!ft->fullname) + goto err_out; + ft->name = ft->fullname; + } + if (!ft->name) + goto err_out; + ft->normalized_name = fy_type_name_normalize(ft->name); + if (!ft->normalized_name) + goto err_out; + + ft->decl = decl; + + rc = backend_type_setup(ft, user); + if (rc) + goto err_out; + + assert(rfl->next_type_id >= 0); + ft->id = rfl->next_type_id++; + + /* for the type to be anonymous it must be a non primitive + * type */ + if (decl && !fy_type_kind_is_primitive(ft->type_kind)) + ft->anonymous = decl->anonymous; + + return ft; +err_out: + fy_type_destroy(ft); + return NULL; +} + +void fy_type_clear_marker(struct fy_type *ft) +{ + if (!ft) + return; + + ft->marker = false; +} + +void fy_type_mark(struct fy_type *ft) +{ + if (!ft || ft->marker) + return; + + ft->marker = true; + + if (ft->decl) + fy_decl_mark(ft->decl); + if (ft->dependent_type) + fy_type_mark(ft->dependent_type); +} + +void fy_type_fixup_size_align(struct fy_type *ft) +{ + struct fy_type *ftc; + enum fy_type_kind type_kind; + struct fy_decl *declc, *decl; + size_t bit_offset, bit_size, bit_align, max_align, max_size, max_bit_offset, bit_width; + bool is_bitfield, last_was_bitfield, is_first_field; + + if (!ft || ft->is_fixed || ft->is_synthetic) + return; + + /* check for recursive fix, and break out */ + if (ft->fix_in_progress) + goto out; + + ft->fix_in_progress = true; + + type_kind = ft->type_kind; + /* invalid or function don't have sizes */ + if (type_kind == FYTK_INVALID || type_kind == FYTK_FUNCTION) + goto out; + + /* primitives have the primitive sizes */ + if (fy_type_kind_is_primitive(type_kind)) { + ft->size = fy_type_kind_info_get_internal(type_kind)->size; + ft->align = fy_type_kind_info_get_internal(type_kind)->align; + goto out; + } + + /* special handling for empty structs/unions */ + if (fy_type_kind_is_record(type_kind)) { + decl = ft->decl; + assert(decl); + if (fy_decl_list_empty(&decl->children)) + ; + } + + /* for the rest, if size, align are set don't try again */ + if (ft->size && ft->align) + goto out; + + switch (type_kind) { + case FYTK_ENUM: + case FYTK_TYPEDEF: + assert(ft->dependent_type); + fy_type_fixup_size_align(ft->dependent_type); + ft->size = ft->dependent_type->size; + ft->align = ft->dependent_type->align; + break; + + case FYTK_PTR: + assert(ft->dependent_type); + fy_type_fixup_size_align(ft->dependent_type); + /* the sizes are always the same as a void pointer */ + ft->size = sizeof(void *); + ft->align = alignof(void *); + break; + + case FYTK_INCOMPLETEARRAY: + assert(ft->dependent_type); + fy_type_fixup_size_align(ft->dependent_type); + /* size is 0, but align is that of the underlying type */ + ft->size = 0; + ft->align = ft->dependent_type->align; + break; + + case FYTK_CONSTARRAY: + assert(ft->dependent_type); + fy_type_fixup_size_align(ft->dependent_type); + /* size is the multiple of the element count */ + ft->size = ft->dependent_type->size * ft->element_count; + ft->align = ft->dependent_type->align; + break; + + case FYTK_STRUCT: + case FYTK_UNION: + decl = ft->decl; + assert(decl); + ft->size = ft->align = 0; + bit_offset = max_bit_offset = 0; + max_align = max_size = 0; + last_was_bitfield = false; + is_first_field = true; + + for (declc = fy_decl_list_head(&decl->children); declc; declc = fy_decl_next(&decl->children, declc)) { + + /* for unions we need to rewind each time */ + if (type_kind == FYTK_UNION) + bit_offset = 0; + + ftc = declc->type; + assert(ftc); + fy_type_fixup_size_align(ftc); + + bit_align = ftc->align * 8; + bit_size = ftc->size * 8; + + is_bitfield = declc->decl_type == FYDT_BITFIELD; + + if (!is_bitfield) { + + /* keep track of maximum alignment */ + if (max_align < ftc->align) + max_align = ftc->align; + + /* advance bit_offset to byte */ + if (last_was_bitfield) + bit_offset = (bit_offset + 8 - 1) & ~(8 - 1); + + /* align to given align */ + bit_offset = (bit_offset + bit_align - 1) & ~(bit_align - 1); + + /* store byte offset */ + if (is_first_field) { + /* first field must always have 0 offset */ + assert(decl->field_decl.byte_offset == 0); + decl->field_decl.byte_offset = 0; + } else { + /* if there is a configured offset check against it */ + /* it must always match */ + if (declc->field_decl.byte_offset) + assert(declc->field_decl.byte_offset == bit_offset / 8); + declc->field_decl.byte_offset = bit_offset / 8; + } + + /* advance and align */ + bit_offset += bit_size; + bit_offset = (bit_offset + bit_align - 1) & ~(bit_align - 1); + } else { + /* XXX probably needs a per target config here, everything is implementation defined */ + + bit_width = declc->bitfield_decl.bit_width; + + /* the byte width of the type must be less or equal of the underlying type */ + assert(bit_width <= bit_size); + + /* special unnamed bitfield, align to natural boundary */ + if (bit_width == 0) + bit_offset = (bit_offset + bit_align - 1) & ~(bit_align - 1); + + /* store the bit-offset */ + declc->bitfield_decl.bit_offset = bit_offset; + + /* advance */ + bit_offset += bit_width; + } + + /* update bit offset */ + if (max_bit_offset < bit_offset) + max_bit_offset = bit_offset; + + last_was_bitfield = is_bitfield; + is_first_field = false; + + } + + /* save max align */ + ft->align = max_align; + + /* align to byte at the end */ + if (last_was_bitfield) + bit_offset = (bit_offset + 8 - 1) & ~(8 - 1); + + /* align to maximum align */ + bit_align = ft->align * 8; + bit_offset = (bit_offset + bit_align - 1) & ~(bit_align - 1); + + /* save max bit offset */ + if (max_bit_offset < bit_offset) + max_bit_offset = bit_offset; + + /* the size is the maximum */ + ft->size = max_bit_offset / 8; + break; + + default: + break; + } + +out: + ft->is_fixed = true; + ft->fix_in_progress = false; +} + +void fy_reflection_fixup_size_align(struct fy_reflection *rfl) +{ + struct fy_type *ft; + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + ft->is_fixed = false; + ft->fix_in_progress = false; + } + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + fy_type_fixup_size_align(ft); + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + ft->is_fixed = false; + ft->fix_in_progress = false; + } +} + +void fy_decl_destroy(struct fy_decl *decl) +{ + struct fy_decl *child; + + if (!decl) + return; + + if (decl->cooked_comment) { + free(decl->cooked_comment); + decl->cooked_comment = NULL; + } + + if (decl->fyd_yaml) { + fy_document_destroy(decl->fyd_yaml); + decl->fyd_yaml = NULL; + } + if (decl->yaml_comment) { + free(decl->yaml_comment); + decl->yaml_comment = NULL; + } + + while ((child = fy_decl_list_pop(&decl->children)) != NULL) + fy_decl_destroy(child); + + backend_decl_cleanup(decl); + + if (decl->name) + free(decl->name); + free(decl); +} + +struct fy_decl *fy_decl_create(struct fy_reflection *rfl, struct fy_import *imp, + struct fy_decl *parent, enum fy_decl_type decl_type, const char *name, void *user) +{ + struct fy_decl *decl; + int rc; + + if (!rfl) + return NULL; + + assert(!imp || imp->rfl == rfl); + + /* guard against rollover */ + if (rfl->next_decl_id + 1 <= 0) + return NULL; + + decl = malloc(sizeof(*decl)); + if (!decl) + goto err_out; + memset(decl, 0, sizeof(*decl)); + + decl->rfl = rfl; + decl->imp = imp; + decl->parent = parent; + decl->decl_type = decl_type; + decl->name = strdup(name); + if (!decl->name) + goto err_out; + decl->source_location = NULL; + + fy_decl_list_init(&decl->children); + + rc = backend_decl_setup(decl, user); + if (rc) + goto err_out; + + assert(rfl->next_decl_id >= 0); + decl->id = rfl->next_decl_id++; + + return decl; +err_out: + fy_decl_destroy(decl); + return NULL; +} + +bool fy_decl_enum_value_is_unsigned(struct fy_decl *decl) +{ + int signess; + + /* only for enum values */ + if (decl->decl_type != FYDT_ENUM_VALUE) + return false; + + signess = fy_type_kind_signess(decl->enum_value_decl.type_kind); + assert(signess != 0); + + return signess > 0; +} + +long long fy_decl_enum_value_signed(struct fy_decl *decl) +{ + /* only for enum values */ + if (!decl || decl->decl_type != FYDT_ENUM_VALUE) + return LLONG_MAX; + + return decl->enum_value_decl.val.s; +} + +unsigned long long fy_decl_enum_value_unsigned(struct fy_decl *decl) +{ + /* only for enum values */ + if (!decl || decl->decl_type != FYDT_ENUM_VALUE) + return ULLONG_MAX; + + return decl->enum_value_decl.val.u; +} + +bool fy_decl_field_is_bitfield(struct fy_decl *decl) +{ + return decl && decl->decl_type == FYDT_BITFIELD; +} + +size_t fy_decl_field_offsetof(struct fy_decl *decl) +{ + /* only for field values */ + if (!decl || decl->decl_type != FYDT_FIELD) + return SIZE_MAX; + + return decl->field_decl.byte_offset; +} + +size_t fy_decl_field_bit_offsetof(struct fy_decl *decl) +{ + /* only for bit-field values */ + if (!decl || decl->decl_type != FYDT_BITFIELD) + return SIZE_MAX; + + return decl->bitfield_decl.bit_offset; +} + +size_t fy_decl_field_sizeof(struct fy_decl *decl) +{ + /* only for field values */ + if (!decl || decl->decl_type != FYDT_FIELD) + return SIZE_MAX; + + assert(decl->type); + return decl->type->size; +} + +size_t fy_decl_field_bit_width(struct fy_decl *decl) +{ + /* only for bit-field values */ + if (!decl || decl->decl_type != FYDT_BITFIELD) + return SIZE_MAX; + + return decl->bitfield_decl.bit_width; +} + +bool fy_decl_is_in_system_header(struct fy_decl *decl) +{ + return decl && decl->in_system_header; +} + +bool fy_decl_is_from_main_file(struct fy_decl *decl) +{ + return decl && decl->from_main_file; +} + +const struct fy_source_location *fy_decl_get_location(struct fy_decl *decl) +{ + if (!decl) + return NULL; + + return decl->source_location; +} + +const char *fy_decl_get_spelling(struct fy_decl *decl) +{ + if (!decl) + return NULL; + return decl->spelling; +} + +const char *fy_decl_get_display_name(struct fy_decl *decl) +{ + if (!decl) + return NULL; + return decl->display_name; +} + +const char *fy_decl_get_signature(struct fy_decl *decl) +{ + if (!decl) + return NULL; + return decl->signature; +} + +void fy_decl_clear_marker(struct fy_decl *decl) +{ + struct fy_decl *declp; + + if (!decl) + return; + + decl->marker = false; + + for (declp = fy_decl_list_head(&decl->children); declp != NULL; declp = fy_decl_next(&decl->children, declp)) + fy_decl_clear_marker(declp); +} + +void fy_decl_mark(struct fy_decl *decl) +{ + struct fy_decl *declp; + + if (!decl || decl->marker) + return; + + decl->marker = true; + + if (decl->imp) + fy_import_mark(decl->imp); + if (decl->parent) + fy_decl_mark(decl->parent); + + for (declp = fy_decl_list_head(&decl->children); declp != NULL; declp = fy_decl_next(&decl->children, declp)) + fy_decl_mark(declp); + + if (decl->source_location && decl->source_location->source_file) + fy_source_file_mark(decl->source_location->source_file); + + if (decl->type) + fy_type_mark(decl->type); + +} + +const char *fy_decl_get_raw_comment(struct fy_decl *decl) +{ + if (!decl) + return NULL; + return decl->raw_comment; +} + +const char *fy_decl_get_cooked_comment(struct fy_decl *decl) +{ + if (!decl || !decl->raw_comment) + return NULL; + + if (!decl->cooked_comment) + decl->cooked_comment = fy_get_cooked_comment(decl->raw_comment, strlen(decl->raw_comment)); + + return decl->cooked_comment; +} + +struct fy_document *fy_decl_get_yaml_annotation(struct fy_decl *decl) +{ + const char *cooked_comment; + + if (!decl || !decl->raw_comment) + return NULL; + + /* if we tried to parse always return what we found */ + if (decl->fyd_yaml_parsed) + return decl->fyd_yaml; + + if (!decl->fyd_yaml) { + cooked_comment = fy_decl_get_cooked_comment(decl); + if (cooked_comment) + decl->fyd_yaml = get_yaml_document(cooked_comment); + } + decl->fyd_yaml_parsed = true; + + return decl->fyd_yaml; +} + +const char *fy_decl_get_yaml_comment(struct fy_decl *decl) +{ + struct fy_document *fyd; + char *s, *e; + + if (!decl) + return NULL; + + if (!decl->yaml_comment) { + fyd = fy_decl_get_yaml_annotation(decl); + if (fyd) { + decl->yaml_comment = fy_emit_document_to_string(fyd, FYECF_MODE_FLOW_ONELINE); + if (decl->yaml_comment) { + /* trim newlines at the end */ + s = decl->yaml_comment; + e = s + strlen(s); + while (s < e && e[-1] == '\n') + *--e = '\0'; + } + } + } + + return decl->yaml_comment; +} + +const char *fy_decl_get_yaml_name(struct fy_decl *decl) +{ + return fy_token_get_text0( + fy_node_get_scalar_token( + fy_node_by_path( + fy_document_root(fy_decl_get_yaml_annotation(decl)), "/name", FY_NT, FYNWF_DONT_FOLLOW))); +} + +void fy_import_destroy(struct fy_import *imp) +{ + if (!imp) + return; + + backend_import_cleanup(imp); + + free(imp); +} + +struct fy_import *fy_import_create(struct fy_reflection *rfl, const void *user) +{ + struct fy_import *imp = NULL; + int rc; + + imp = malloc(sizeof(*imp)); + if (!imp) + goto err_out; + memset(imp, 0, sizeof(*imp)); + + imp->rfl = rfl; + + rfl->imp_curr = imp; + rc = backend_import_setup(imp, user); + rfl->imp_curr = NULL; + if (rc) + goto err_out; + + return imp; + +err_out: + fy_import_destroy(imp); + return NULL; +} + +void fy_import_clear_marker(struct fy_import *imp) +{ + if (!imp) + return; + + imp->marker = false; +} + +void fy_import_mark(struct fy_import *imp) +{ + if (!imp || imp->marker) + return; + + imp->marker = true; +} + +void fy_source_file_destroy(struct fy_source_file *srcf) +{ + if (!srcf) + return; + + if (srcf->realpath) + free(srcf->realpath); + + if (srcf->filename) + free(srcf->filename); + + free(srcf); +} + +struct fy_source_file * +fy_reflection_lookup_source_file(struct fy_reflection *rfl, const char *filename) +{ + struct fy_source_file *srcf; + char *realname; + + if (!rfl || !filename) + return NULL; + + realname = realpath(filename, NULL); + if (!realname) + return NULL; + + /* TODO hash it */ + for (srcf = fy_source_file_list_head(&rfl->source_files); srcf != NULL; srcf = fy_source_file_next(&rfl->source_files, srcf)) { + if (!strcmp(srcf->realpath, realname)) + break; + } + + free(realname); + + return srcf; +} + +struct fy_source_file * +fy_source_file_create(struct fy_reflection *rfl, const char *filename) +{ + struct fy_source_file *srcf = NULL; + + if (!rfl || !filename) + return NULL; + + /* guard against rollover */ + if (rfl->next_source_file_id + 1 <= 0) + return NULL; + + srcf = malloc(sizeof(*srcf)); + if (!srcf) + goto err_out; + memset(srcf, 0, sizeof(*srcf)); + + srcf->filename = strdup(filename); + if (!srcf->filename) + goto err_out; + + srcf->realpath = realpath(filename, NULL); + if (!srcf->realpath) + goto err_out; + + assert(rfl->next_source_file_id >= 0); + srcf->id = rfl->next_source_file_id++; + + return srcf; + +err_out: + fy_source_file_destroy(srcf); + return NULL; +} + +void fy_source_file_clear_marker(struct fy_source_file *srcf) +{ + if (!srcf) + return; + + srcf->marker = false; +} + +void fy_source_file_mark(struct fy_source_file *srcf) +{ + if (!srcf || srcf->marker) + return; + + srcf->marker = true; +} + +void fy_source_file_dump(struct fy_source_file *srcf) +{ + if (!srcf) + return; + + printf("\t%c %s realpath='%s' system=%s main_file=%s\n", + srcf->marker ? '*' : ' ', + srcf->filename, + srcf->realpath, + srcf->system_header ? "true" : "false", + srcf->main_file ? "true" : "false"); +} + +static int fy_reflection_setup(struct fy_reflection *rfl, const struct fy_reflection_cfg *rflc); +static void fy_reflection_cleanup(struct fy_reflection *rfl); + +static int fy_reflection_setup(struct fy_reflection *rfl, const struct fy_reflection_cfg *rflc) +{ + const struct fy_reflection_backend_ops *ops; + int rc; + + /* basic checks */ + if (!rflc || !rflc->backend || !rflc->backend->ops) + goto err_out; + + ops = rflc->backend->ops; + /* all methods must be non NULL */ + if (!ops->reflection_setup || + !ops->reflection_cleanup || + !ops->import_setup || + !ops->import_cleanup || + !ops->type_setup || + !ops->type_cleanup || + !ops->decl_setup || + !ops->decl_cleanup) + goto err_out; + + memset(rfl, 0, sizeof(*rfl)); + rfl->cfg = *rflc; + fy_import_list_init(&rfl->imports); + fy_source_file_list_init(&rfl->source_files); + fy_type_list_init(&rfl->types); + fy_decl_list_init(&rfl->decls); + rfl->next_type_id = 0; + rfl->next_decl_id = 0; + rfl->next_source_file_id = 0; + + rc = backend_reflection_setup(rfl); + if (rc) + goto err_out; + + return 0; + +err_out: + return -1; +} + +void fy_reflection_update_type_info(struct fy_reflection *rfl) +{ + struct fy_type *ft; + + /* reset all type infos */ + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + fy_type_reset_type_info(ft); + + /* and generate them */ + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + (void)fy_type_get_type_info(ft); +} + +static void fy_reflection_cleanup(struct fy_reflection *rfl) +{ + struct fy_import *imp; + struct fy_source_file *srcf; + struct fy_type *ft; + struct fy_decl *decl; + + assert(rfl); + + while ((ft = fy_type_list_pop(&rfl->types)) != NULL) + fy_type_destroy(ft); + + while ((decl = fy_decl_list_pop(&rfl->decls)) != NULL) + fy_decl_destroy(decl); + + while ((srcf = fy_source_file_list_pop(&rfl->source_files)) != NULL) + fy_source_file_destroy(srcf); + + while ((imp = fy_import_list_pop(&rfl->imports)) != NULL) + fy_import_destroy(imp); + + backend_reflection_cleanup(rfl); +} + +static void fy_type_reset_type_info(struct fy_type *ft) +{ + struct fy_type_info *ti; + + if (!ft) + return; + + if (ft->field_decls) { + free(ft->field_decls); + ft->field_decls = NULL; + } + + ti = &ft->type_info; + if (ti->fields) + free((void *)ti->fields); + memset(ti, 0, sizeof(*ti)); + ft->has_type_info = false; +} + +static struct fy_type_info *fy_type_get_type_info(struct fy_type *ft) +{ + struct fy_decl *decl, *declc; + struct fy_type_info *ti; + struct fy_field_info *fi = NULL; + struct fy_decl **field_decls = NULL, **fds; + + if (!ft) + return NULL; + + /* need to check if recursively producing */ + if (ft->has_type_info || ft->producing_type_info) + return &ft->type_info; + + ft->producing_type_info = true; + + fy_type_reset_type_info(ft); + + ti = &ft->type_info; + ti->kind = ft->type_kind; + if (ft->is_const) + ti->flags |= FYTIF_CONST; + if (ft->is_volatile) + ti->flags |= FYTIF_VOLATILE; + if (ft->is_restrict) + ti->flags |= FYTIF_RESTRICT; + if (ft->is_fake_resolved) + ti->flags |= FYTIF_UNRESOLVED_PTR; + if (ft->decl && ft->decl->from_main_file) + ti->flags |= FYTIF_MAIN_FILE; + if (ft->decl && ft->decl->in_system_header) + ti->flags |= FYTIF_SYSTEM_HEADER; + if (ft->anonymous) + ti->flags |= FYTIF_ANONYMOUS; + + ti->name = ft->name; + ti->fullname = ft->fullname; + ti->normalized_name = ft->normalized_name; + ti->size = ft->size; + ti->align = ft->align; + + /* primitive types, no more to do */ + if (fy_type_kind_is_primitive(ti->kind) || + ti->kind == FYTK_FUNCTION || ti->kind == FYTK_VOID) + goto out; + + /* if we have a type we're dependent on, pull it in */ + if (ft->dependent_type) { + ti->dependent_type = fy_type_get_type_info(ft->dependent_type); + if (!ti->dependent_type) + goto err_out; + } + + /* for pointers or typedef we' done now */ + if (ti->kind == FYTK_PTR || ti->kind == FYTK_TYPEDEF || ti->kind == FYTK_INCOMPLETEARRAY) + goto out; + + /* constant array? fill in and out */ + if (ti->kind == FYTK_CONSTARRAY) { + ti->count = ft->element_count; + goto out; + } + + /* only those from now on */ + assert(fy_type_kind_has_fields(ti->kind)); + + assert(ft->decl); + + decl = ft->decl; + + /* count the number of fields/enum_values */ + ti->count = 0; + for (declc = fy_decl_list_head(&decl->children); declc; declc = fy_decl_next(&decl->children, declc)) + ti->count++; + + field_decls = malloc(sizeof(*field_decls) * ti->count); + if (!field_decls) + goto err_out; + + fi = malloc(sizeof(*fi) * ti->count); + if (!fi) + goto err_out; + memset(fi, 0, sizeof(*fi) * ti->count); + ti->fields = fi; + ft->field_decls = field_decls; + + fds = field_decls; + for (declc = fy_decl_list_head(&decl->children); declc; declc = fy_decl_next(&decl->children, declc), fi++) { + *fds++ = declc; + fi->flags = 0; + if (declc->anonymous) + fi->flags |= FYFIF_ANONYMOUS; + fi->parent = ti; + fi->name = declc->name; + fi->type_info = fy_type_get_type_info(declc->type); /* may be null for enum values */ + + if (ft->type_kind == FYTK_ENUM) { + if (fy_decl_enum_value_is_unsigned(declc)) { + fi->flags |= FYFIF_ENUM_UNSIGNED; + fi->uval = fy_decl_enum_value_unsigned(declc); + } else { + fi->sval = fy_decl_enum_value_signed(declc); + } + } else { + if (declc->decl_type == FYDT_BITFIELD) { + fi->flags |= FYFIF_BITFIELD; + fi->bit_offset = fy_decl_field_bit_offsetof(declc); + fi->bit_width = fy_decl_field_bit_width(declc); + } else + fi->offset = fy_decl_field_offsetof(declc); + } + } + +out: + ft->producing_type_info = false; + ft->has_type_info = true; + return ti; + +err_out: + if (field_decls) + free(field_decls); + if (fi) + free(fi); + ft->producing_type_info = false; + ft->has_type_info = false; + return NULL; +} + +void fy_reflection_destroy(struct fy_reflection *rfl) +{ + if (!rfl) + return; + fy_reflection_cleanup(rfl); + free(rfl); +} + +static struct fy_document *get_yaml_document_at_keyword(const char *start, size_t size, size_t *advance) +{ + const char *s, *e; + struct fy_document *fyd = NULL; + size_t skip = 0; + + assert(size > strlen("yaml:") + 1); + assert(!memcmp(start, "yaml:", 5)); + + s = start; + e = s + size; + + /* skip over yaml: */ + s += 5; + + /* skip over spaces and tabs */ + while (s < e && isblank(*s)) + s++; + + assert(s < e); + if (*s == '\n') { /* block document */ + s++; + assert(s < e); + + fyd = fy_block_document_build_from_string(NULL, s, e - s, &skip); + + } else if (*s == '{' || *s == '[') { /* flow document */ + fyd = fy_flow_document_build_from_string(NULL, s, e - s, &skip); + } + + if (fyd) + s += skip; + *advance = (size_t)(s - start); + + return fyd; +} + +static struct fy_document *get_yaml_document(const char *cooked_comment) +{ + struct fy_document *fyd = NULL; + struct fy_keyword_iter iter; + const char *found; + size_t advance; + + if (!cooked_comment) + return NULL; + + fy_keyword_iter_begin(cooked_comment, strlen(cooked_comment), "yaml:", &iter); + while ((found = fy_keyword_iter_next(&iter)) != NULL) { + + /* single document only for now */ + fyd = get_yaml_document_at_keyword(found, strlen(found), &advance); + if (fyd) + break; + + fy_keyword_iter_advance(&iter, advance); + } + fy_keyword_iter_end(&iter); + + return fyd; +} + +void fy_decl_dump(struct fy_decl *decl, int start_level, bool no_location) +{ + const struct fy_source_location *source_location; + struct fy_decl *declp; + const char *type_name; + const char *comment; + int level; + char *tabs; + size_t bitoff; + struct fy_comment_iter iter; + const char *text; + size_t len; + bool raw_comments = false; + + level = start_level; + declp = decl->parent; + while (declp) { + declp = declp->parent; + level++; + } + tabs = alloca(level + 1); + memset(tabs, '\t', level); + tabs[level] = '\0'; + + if (raw_comments) { + comment = fy_decl_get_raw_comment(decl); + if (comment) { + fy_comment_iter_begin(comment, strlen(comment), &iter); + while ((text = fy_comment_iter_next_line(&iter, &len)) != NULL) + printf("%s\t // %.*s\n", tabs, (int)len, text); + fy_comment_iter_end(&iter); + } + } + comment = fy_decl_get_yaml_comment(decl); + if (comment) { + printf("%s\t // yaml: %s\n", tabs, comment); + } + + printf("%s\t%c D#%u '%s':'%s'", tabs, + decl->marker ? '*' : ' ', + decl->id, + decl_type_txt[decl->decl_type], decl->name); + + assert(decl->type); + type_name = fy_type_kind_info_get_internal(decl->type->type_kind)->name; + printf(" -> T#%d %s:'%s'", decl->type->id, type_name, decl->type->name); + + switch (decl->decl_type) { + case FYDT_ENUM: + assert(decl->type->dependent_type); + printf(" \"%s\"", fy_type_kind_info_get_internal(decl->type->dependent_type->type_kind)->name); + break; + case FYDT_ENUM_VALUE: + if (!fy_decl_enum_value_is_unsigned(decl)) + printf(" %lld", fy_decl_enum_value_signed(decl)); + else + printf(" %llu", fy_decl_enum_value_unsigned(decl)); + break; + case FYDT_FIELD: + printf(" offset=%zu", fy_decl_field_offsetof(decl)); + break; + case FYDT_BITFIELD: + bitoff = fy_decl_field_bit_offsetof(decl); + printf(" bitfield offset=%zu (%zu/%zu) width=%zu", + bitoff, bitoff/8, bitoff%8, + fy_decl_field_bit_width(decl)); + break; + default: + break; + } + + if (!no_location) { + source_location = fy_decl_get_location(decl); + if (source_location) + printf(" %s@%u:%u", source_location->source_file->filename, source_location->line, source_location->column); + } + + if (decl->is_synthetic) + printf(" synthetic"); + + printf("\n"); + + for (declp = fy_decl_list_head(&decl->children); declp != NULL; declp = fy_decl_next(&decl->children, declp)) + fy_decl_dump(declp, start_level, no_location); +} + +bool fy_type_is_pointer(struct fy_type *ft) +{ + if (!ft) + return false; + return ft->type_kind == FYTK_PTR; +} + +bool fy_type_is_array(struct fy_type *ft) +{ + if (!ft) + return false; + return ft->type_kind == FYTK_CONSTARRAY || ft->type_kind == FYTK_INCOMPLETEARRAY; +} + +bool fy_type_is_constant_array(struct fy_type *ft) +{ + if (!ft) + return false; + return ft->type_kind == FYTK_CONSTARRAY; +} + +bool fy_type_is_incomplete_array(struct fy_type *ft) +{ + if (!ft) + return false; + return ft->type_kind == FYTK_INCOMPLETEARRAY; +} + +size_t fy_type_get_sizeof(struct fy_type *ft) +{ + if (!ft) + return 0; + return ft->size; +} + +size_t fy_type_get_alignof(struct fy_type *ft) +{ + if (!ft) + return 0; + return ft->align; +} + +int fy_type_get_constant_array_element_count(struct fy_type *ft) +{ + if (!fy_type_is_constant_array(ft)) + return -1; + return ft->element_count; +} + +struct fy_type *fy_type_get_dependent_type(struct fy_type *ft) +{ + if (!ft || !fy_type_kind_is_dependent(ft->type_kind)) + return NULL; + return ft->dependent_type; +} + +/* TODO optimize later */ +struct fy_decl *fy_type_get_field_decl_by_name(struct fy_type *ft, const char *field) +{ + struct fy_decl *decl, *declf; + + if (!ft || !field || !fy_type_kind_has_fields(ft->type_kind)) + return NULL; + + decl = ft->decl; + assert(decl); + + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(fy_decl_type_is_field(declf->decl_type)); + if (!strcmp(declf->name, field)) + return declf; + } + + return NULL; +} + +/* TODO optimize later */ +struct fy_decl *fy_type_get_field_decl_by_enum_value(struct fy_type *ft, long long val) +{ + struct fy_decl *decl, *declf; + + if (!ft || ft->type_kind != FYTK_ENUM) + return NULL; + + decl = ft->decl; + assert(decl); + + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(declf->decl_type == FYDT_ENUM_VALUE); + if (declf->enum_value_decl.val.s == val) + return declf; + } + + return NULL; +} + +/* TODO optimize later */ +struct fy_decl *fy_type_get_field_decl_by_unsigned_enum_value(struct fy_type *ft, unsigned long long val) +{ + struct fy_decl *decl, *declf; + + if (!ft || ft->type_kind != FYTK_ENUM) + return NULL; + + decl = ft->decl; + assert(decl); + + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(declf->decl_type == FYDT_ENUM_VALUE); + if (declf->enum_value_decl.val.u == val) + return declf; + } + + return NULL; +} + +/* TODO optimize later */ +int fy_type_get_field_count(struct fy_type *ft) +{ + struct fy_decl *decl, *declf; + int count; + + if (!ft || !fy_type_kind_has_fields(ft->type_kind)) + return -1; + + decl = ft->decl; + assert(decl); + + count = 0; + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(fy_decl_type_is_field(declf->decl_type)); + count++; + } + return count; +} + +/* TODO optimize later */ +int fy_type_get_field_index_by_name(struct fy_type *ft, const char *field) +{ + struct fy_decl *decl, *declf; + const char *field_name; + int idx; + + if (!ft || !field || !fy_type_kind_has_fields(ft->type_kind)) + return -1; + + decl = ft->decl; + assert(decl); + + idx = 0; + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(fy_decl_type_is_field(declf->decl_type)); + + field_name = fy_decl_get_yaml_name(declf); + if (!field_name) + field_name = declf->name; + + if (!strcmp(field_name, field)) + return idx; + idx++; + } + + return -1; +} + +/* TODO optimize later */ +struct fy_decl *fy_type_get_field_decl_by_idx(struct fy_type *ft, unsigned int idx) +{ + struct fy_decl *decl, *declf; + unsigned int i; + + if (!ft || !fy_type_kind_has_fields(ft->type_kind)) + return NULL; + + decl = ft->decl; + assert(decl); + + i = 0; + for (declf = fy_decl_list_head(&decl->children); declf != NULL; declf = fy_decl_next(&decl->children, declf)) { + assert(fy_decl_type_is_field(declf->decl_type)); + if (i++ == idx) + return declf; + } + + return NULL; +} + +int fy_type_get_field_idx_by_decl(struct fy_type *ft, struct fy_decl *decl) +{ + struct fy_decl *declf; + unsigned int i; + + if (!ft || !decl) + return -1; + + assert(ft->decl); + i = 0; + for (declf = fy_decl_list_head(&ft->decl->children); declf != NULL; declf = fy_decl_next(&ft->decl->children, declf)) { + if (declf == decl) + return i; + i++; + } + return -1; +} + +const struct fy_source_location * +fy_type_get_decl_location(struct fy_type *ft) +{ + /* cannot get location in those cases */ + if (!ft || !ft->decl || fy_type_kind_is_primitive(ft->type_kind)) + return NULL; + + return fy_decl_get_location(ft->decl); +} + +char *fy_type_generate_name_internal(struct fy_type *ft, const char *field, int genidx, bool normalized) +{ + const struct fy_type_kind_info *tki; + struct fy_decl *decl; + struct fy_type *ftd; + enum fy_type_kind tk; + const char *declname, *s; + char *depname = NULL; + int depname_size; + FILE *fp = NULL; + char *buf, *bufn; + size_t len; + int ret; + bool error = false; + bool has_field; + char marker[16]; + const char *ms, *me; + const char *punct_list; + bool dep_last_is_punct, field_first_is_punct; + + if (!ft) + return NULL; + + has_field = field && field[0]; + + tk = ft->type_kind; + tki = fy_type_kind_info_get_internal(tk); + + buf = NULL; + len = 0; + fp = open_memstream(&buf, &len); + if (!fp) + goto err_out; + +#undef FPRINTF +#define FPRINTF(_fmt, ...) \ + do { \ + ret = fprintf(fp, (_fmt), ##__VA_ARGS__); \ + if (ret < 0) \ + goto err_out; \ + } while(0) + + if (ft->is_const) + FPRINTF("const "); + + if (ft->is_volatile) + FPRINTF("volatile "); + + if (fy_type_kind_is_primitive(tk) || tk == FYTK_INVALID) { + FPRINTF("%s%s%s", tki->name, has_field ? " " : "", has_field ? field : ""); + goto out; + } + + decl = ft->decl; + if (decl && decl->name) + declname = decl->name; + else + declname = ""; + + if (tk == FYTK_TYPEDEF) { + FPRINTF("%s%s%s", declname, has_field ? " " : "", has_field ? field : ""); + goto out; + } + if (tk == FYTK_FUNCTION) { + s = strchr(declname, '('); + if (!s) + goto err_out; + + FPRINTF("%.*s%s%s", (int)(s - declname), declname, has_field ? field : "", s); + goto out; + } + + /* struct, union and enum */ + if (fy_type_kind_has_prefix(tk)) { + if (!has_field) { + FPRINTF("%s", declname); + } else { + FPRINTF("%s %s %s", tki->name, declname, field); + } + goto out; + } + + assert(fy_type_kind_is_dependent(tk)); + + ftd = ft->dependent_type; + if (!ftd) + goto err_out; + + snprintf(marker, sizeof(marker) - 1, "@%d@", genidx); + depname = fy_type_generate_name_internal(ftd, marker, genidx + 1, normalized); + if (!depname) + goto err_out; + + ms = strstr(depname, marker); + if (!ms) + goto err_out; + depname_size = (int)(ms - depname); + /* strip trailing spaces */ + while (depname_size > 0 && isspace(depname[depname_size-1])) + depname_size--; + + me = ms + strlen(marker); + + punct_list = "*()"; + field_first_is_punct = field && field[0] && strchr(punct_list, field[0]) != NULL; + dep_last_is_punct = depname_size <= 0 || strchr(punct_list, depname[depname_size-1]) != NULL; + + FPRINTF("%.*s", depname_size, depname); + + // fprintf(stderr, "%s:%d dep='%.*s' field='%s', me='%s' field_first_is_punct=%s dep_last_is_punct=%s\n", + // __FILE__, __LINE__, depname_size, depname, field, me, + // field_first_is_punct ? "true" : "false", + // dep_last_is_punct ? "true" : "false"); + + /* if neither the first of the field or the last of the dep is punct, put a space there */ + if (!field_first_is_punct && !dep_last_is_punct) + FPRINTF(" "); + + switch (tk) { + + case FYTK_PTR: + if (ftd->type_kind != FYTK_FUNCTION && ftd->type_kind != FYTK_CONSTARRAY && ftd->type_kind != FYTK_INCOMPLETEARRAY) + FPRINTF("*%s", field); + else + FPRINTF("(*%s)", field); + break; + + case FYTK_INCOMPLETEARRAY: + if (ftd->type_kind != FYTK_FUNCTION) + FPRINTF("%s[]", field); + else + FPRINTF("(%s[])", field); + break; + + case FYTK_CONSTARRAY: + if (ftd->type_kind != FYTK_FUNCTION) + FPRINTF("%s[%llu]", field, ft->element_count); + else + FPRINTF("(%s[%llu])", field, ft->element_count); + break; + + default: + abort(); + break; + } + FPRINTF("%s", me); + +out: + if (depname) + free(depname); + + if (fp) + fclose(fp); + + if (error) { + free(buf); + buf = NULL; + } else if (normalized && buf) { + bufn = fy_type_name_normalize(buf); + assert(bufn); + assert(bufn); + free(buf); + buf = bufn; + assert(buf); + } + + return buf; + +err_out: + error = true; + goto out; +} + +char *fy_type_generate_name(struct fy_type *ft, const char *field, bool normalized) +{ + return fy_type_generate_name_internal(ft, field ? field : "", 0, normalized); +} + +void fy_type_dump(struct fy_type *ft, bool no_location); + +struct fy_decl *fy_type_get_anonymous_parent_decl(struct fy_type *ft) +{ + struct fy_reflection *rfl; + struct fy_type *ftp; + struct fy_decl *decl, *declc; + + if (!ft || !ft->anonymous) + return NULL; + rfl = ft->rfl; + assert(rfl); + + for (ftp = fy_type_list_head(&rfl->types); ftp != NULL; ftp = fy_type_next(&rfl->types, ftp)) { + if (!ftp->decl) + continue; + decl = ftp->decl; + for (declc = fy_decl_list_head(&decl->children); declc; declc = fy_decl_next(&decl->children, declc)) { + if (declc->type == ft) + return declc; + } + } + return NULL; +} + +size_t fy_type_eponymous_offset(struct fy_type *ft) +{ + size_t offset; + struct fy_decl *decl; + + if (!ft) + return 0; + + offset = 0; + while (ft->anonymous && (decl = fy_type_get_anonymous_parent_decl(ft)) != NULL) { + assert(decl->decl_type == FYDT_FIELD); + offset += decl->field_decl.byte_offset; + ft = decl->parent->type; + } + + return offset; +} + +const char *fy_type_get_raw_comment(struct fy_type *ft) +{ + if (!ft || !ft->decl) + return NULL; + return fy_decl_get_raw_comment(ft->decl); +} + +const char *fy_type_get_cooked_comment(struct fy_type *ft) +{ + if (!ft || !ft->decl) + return NULL; + return fy_decl_get_cooked_comment(ft->decl); +} + +struct fy_document *fy_type_get_yaml_annotation(struct fy_type *ft) +{ + if (!ft || !ft->decl) + return NULL; + return fy_decl_get_yaml_annotation(ft->decl); +} + +const char *fy_type_get_yaml_comment(struct fy_type *ft) +{ + if (!ft || !ft->decl) + return NULL; + return fy_decl_get_yaml_comment(ft->decl); +} + +const char *fy_type_get_yaml_name(struct fy_type *ft) +{ + const char *name; + + if (!ft || !ft->decl) + return NULL; + + name = fy_decl_get_yaml_name(ft->decl); + if (name) + return name; + return ft->name; +} + + +void fy_type_dump(struct fy_type *ft, bool no_location) +{ + const struct fy_source_location *source_location; + const char *type_name; + struct fy_type *ftd; + char *ntn1, *ntn2; + + printf("\t%c T#%d", ft->marker ? '*' : ' ', ft->id); + type_name = fy_type_kind_info_get_internal(ft->type_kind)->name; + printf(" %s:'%s'", type_name, ft->name); + + /* compare the normalized names */ + ntn1 = fy_type_generate_name(ft, NULL, true); + assert(ntn1); + ntn2 = fy_type_name_normalize(ft->name); + assert(ntn2); + + /* display diffs in normalized names */ + if (strcmp(ntn1, ntn2)) + printf(":!'%s'-'%s'", ntn1, ntn2); + + free(ntn2); + free(ntn1); + + printf(" size=%zu align=%zu", fy_type_get_sizeof(ft), fy_type_get_alignof(ft)); + + if (fy_type_is_declared(ft)) { + printf(" -> D#%u", ft->decl->id); + if (!no_location) { + source_location = fy_type_get_decl_location(ft); + if (source_location) + printf(" %s@%u:%u", source_location->source_file->filename, source_location->line, source_location->column); + } + } + + if (fy_type_kind_is_dependent(ft->type_kind)) { + if (fy_type_is_resolved(ft)) { + ftd = fy_type_get_dependent_type(ft); + if (ftd) { + type_name = fy_type_kind_info_get_internal(ftd->type_kind)->name; + printf(" -> T#%d %s:'%s'", ftd->id, type_name, ftd->name); + } else { + printf(" -> T#"); + } + } else + printf(" unresolved"); + } + + if (ft->anonymous) + printf(" anonymous"); + + if (ft->is_synthetic) + printf(" synthetic"); + + if (ft->is_fake_resolved) + printf(" fake-resolved"); + + if (ft->is_const) + printf(" const"); + + if (ft->is_volatile) + printf(" volatile"); + + if (ft->is_restrict) + printf(" restrict"); + + printf("\n"); +} + +struct fy_reflection *fy_reflection_create(const struct fy_reflection_cfg *rflc) +{ + struct fy_reflection *rfl; + int rc; + + rfl = malloc(sizeof(*rfl)); + if (!rfl) + goto err_out; + memset(rfl, 0, sizeof(*rfl)); + + rc = fy_reflection_setup(rfl, rflc); + if (rc) + goto err_out; + + return rfl; + +err_out: + fy_reflection_destroy(rfl); + return NULL; +} + +int fy_reflection_import(struct fy_reflection *rfl, const void *user) +{ + struct fy_import *imp = NULL; + + assert(rfl); + + imp = fy_import_create(rfl, user); + if (!imp) + goto err_out; + + fy_import_list_add_tail(&rfl->imports, imp); + + return 0; + +err_out: + fy_import_destroy(imp); + return -1; +} + +bool fy_reflection_is_resolved(struct fy_reflection *rfl) +{ + return rfl && rfl->unresolved_types_count == 0; +} + +void fy_reflection_clear_all_markers(struct fy_reflection *rfl) +{ + struct fy_type *ft; + struct fy_decl *decl; + struct fy_import *imp; + struct fy_source_file *srcf; + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + fy_type_clear_marker(ft); + + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) + fy_decl_clear_marker(decl); + + for (imp = fy_import_list_head(&rfl->imports); imp != NULL; imp = fy_import_next(&rfl->imports, imp)) + fy_import_clear_marker(imp); + + for (srcf = fy_source_file_list_head(&rfl->source_files); srcf != NULL; srcf = fy_source_file_next(&rfl->source_files, srcf)) + fy_source_file_clear_marker(srcf); +} + +struct fy_type * +fy_reflection_lookup_type(struct fy_reflection *rfl, enum fy_type_kind type_kind, const char *name) +{ + struct fy_type *ft; + const char *type_name; + const char *s; + char *ntn1, *ntn2; + size_t len; + enum fy_type_kind type_kind_auto; + + if (!rfl || !name) + return NULL; + + ntn1 = ntn2 = NULL; + ft = NULL; + + ntn1 = fy_type_name_normalize(name); + if (!ntn1) + goto err_out; + + s = strchr(ntn1, ' '); + type_name = s ? s : ntn1; + while (isblank(*type_name)) + type_name++; + len = strlen(ntn1); + if (len > 7 && !strncmp(ntn1, "struct ", 7)) + type_kind_auto = FYTK_STRUCT; + else if (len > 6 && !strncmp(ntn1, "union ", 6)) + type_kind_auto = FYTK_UNION; + else + type_kind_auto = FYTK_INVALID; + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + + /* if type kind is valid, the type must match */ + if (type_kind != FYTK_INVALID && ft->type_kind != type_kind) + continue; + + /* type kind is invalid, but an auto type kind was found, match that */ + if (type_kind == FYTK_INVALID && type_kind_auto != FYTK_INVALID && ft->type_kind != type_kind_auto) + continue; + + /* check name match */ + if (!strcmp(ft->normalized_name, type_name)) + break; + } + +err_out: + if (ntn1) + free(ntn1); + if (ntn2) + free(ntn2); + + return ft; +} + +void fy_reflection_renumber(struct fy_reflection *rfl) +{ + struct fy_decl *decl, *decl2; + struct fy_type *ft; + struct fy_source_file *srcf; + + rfl->next_decl_id = 0; + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) { + decl->id = rfl->next_decl_id++; + for (decl2 = fy_decl_list_head(&decl->children); decl2 != NULL; decl2 = fy_decl_next(&decl->children, decl2)) { + decl2->id = rfl->next_decl_id++; + /* note that there is no second level of decls */ + assert(fy_decl_list_empty(&decl2->children)); + } + } + + rfl->next_type_id = 0; + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) + ft->id = rfl->next_type_id++; + + rfl->next_source_file_id = 0; + for (srcf = fy_source_file_list_head(&rfl->source_files); srcf != NULL; srcf = fy_source_file_next(&rfl->source_files, srcf)) + srcf->id = rfl->next_source_file_id++; +} + +void fy_reflection_prune_unmarked(struct fy_reflection *rfl) +{ + struct fy_import *imp, *impn; + struct fy_decl *decl, *decln; + struct fy_source_file *srcf, *srcfn; + struct fy_type *ft, *ftn; + + if (!rfl) + return; + + for (imp = fy_import_list_head(&rfl->imports); imp != NULL; imp = impn) { + impn = fy_import_next(&rfl->imports, imp); + + if (!imp->marker) { + fy_import_list_del(&rfl->imports, imp); + fy_import_destroy(imp); + continue; + } + } + + /* note second level decls are always fields */ + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = decln) { + decln = fy_decl_next(&rfl->decls, decl); + + if (!decl->marker) { + fy_decl_list_del(&rfl->decls, decl); + fy_decl_destroy(decl); + continue; + } + } + + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = ftn) { + ftn = fy_type_next(&rfl->types, ft); + + if (!ft->marker) { + fy_type_list_del(&rfl->types, ft); + fy_type_destroy(ft); + continue; + } + } + + for (srcf = fy_source_file_list_head(&rfl->source_files); srcf != NULL; srcf = srcfn) { + srcfn = fy_source_file_next(&rfl->source_files, srcf); + + if (!srcf->marker) { + fy_source_file_list_del(&rfl->source_files, srcf); + fy_source_file_destroy(srcf); + continue; + } + } + + fy_reflection_renumber(rfl); + fy_reflection_update_type_info(rfl); +} + +struct fy_type * +lookup_or_create_primitive_type(struct fy_reflection *rfl, enum fy_type_kind type_kind) +{ + const struct fy_type_kind_info *tki; + struct fy_type *ft; + + tki = fy_type_kind_info_get_internal(type_kind); + ft = fy_reflection_lookup_type(rfl, type_kind, tki->name); + if (ft) + return ft; + + ft = fy_type_create(rfl, type_kind, tki->name, NULL, NULL); + if (!ft) + return NULL; + ft->size = tki->size; + ft->align = tki->align; + fy_type_list_add_tail(&rfl->types, ft); + return ft; +} + +enum fy_type_kind +select_primitive_type_kind_for_array(struct fy_reflection *rfl, size_t size, size_t align) +{ + const struct fy_type_kind_info *tki; + bool use_ull; + static const enum fy_type_kind check[] = { + FYTK_ULONGLONG, + FYTK_ULONG, + FYTK_UINT, + FYTK_USHORT, + FYTK_UCHAR, + FYTK_INVALID, /* end */ + }; + static const enum fy_type_kind *p; + + assert(align > 0); + assert(size > 0); + + /* otherwise it's a constant array with the required size and alignment */ + p = check; + use_ull = (sizeof(unsigned long long) != sizeof(unsigned long)) || + (alignof(unsigned long long) != sizeof(unsigned long)); + if (!use_ull) + p++; + for (; *p != FYTK_INVALID; p++) { + tki = fy_type_kind_info_get_internal(*p); + if (tki->align == align && size >= tki->size && (size % tki->size) == 0) + break; + } + /* this is impossible */ + assert(*p != FYTK_INVALID); + return *p; +} + +struct fy_type * +lookup_or_create_primitive_type_array(struct fy_reflection *rfl, enum fy_type_kind type_kind, unsigned long long element_count) +{ + const struct fy_type_kind_info *tki; + struct fy_type *ft, *ft_base; + char *name = NULL; + int ret; + + tki = fy_type_kind_info_get_internal(type_kind); + ft_base = lookup_or_create_primitive_type(rfl, type_kind); + if (!ft_base) + return NULL; + + ret = asprintf(&name, "%s [%llu]", tki->name, element_count); + if (ret < 0) + goto err_out; + + ft = fy_reflection_lookup_type(rfl, FYTK_CONSTARRAY, name); + if (ft) { + free(name); + return ft; + } + + ft = fy_type_create(rfl, FYTK_CONSTARRAY, name, NULL, NULL); + if (!ft) + goto err_out; + + ft->is_synthetic = true; + ft->size = ft_base->size * element_count; + ft->align = ft_base->align; + ft->element_count = element_count; + ft->dependent_type_kind = type_kind; + ft->dependent_type = ft_base; + ft->fake_resolve_data = name; + + fy_type_list_add_tail(&rfl->types, ft); + + return ft; + +err_out: + if (name) + free(name); + return NULL; +} + +static int +fix_unresolved_type(struct fy_type *ft) +{ + const struct fy_type_kind_info *tki_base = NULL; + struct fy_reflection *rfl = ft->rfl; + struct fy_type *ftt, *ftt_inner; + struct fy_import *imp; + struct fy_decl *decl, *declc; + enum fy_decl_type decl_type; + enum fy_type_kind base_type_kind; + char *synthetic_name = NULL; + const char *name; + unsigned long long element_count; + + assert(ft->dependent_type_name); + + /* fprintf(stderr, "%s:%d - ft->type_kind=%s ft->name=%s ft->dependent_type_kind=%s ft->dependent_type_name=%s\n", + __FILE__, __LINE__, + fy_type_kind_info_get_internal(ft->type_kind)->name, ft->name, + fy_type_kind_info_get_internal(ft->dependent_type_kind)->name, ft->dependent_type_name); */ + + /* for an unresolved pointer, first try if there's a type registered + * at the previous pass + */ + if (ft->type_kind == FYTK_PTR) { + ftt = fy_reflection_lookup_type(rfl, ft->dependent_type_kind, ft->dependent_type_name); + /* if found, very good, already resolved */ + if (ftt) + goto done; + } + + assert(ft->dependent_type_kind == FYTK_STRUCT || + ft->dependent_type_kind == FYTK_UNION || + ft->dependent_type_kind == FYTK_TYPEDEF); + + /* if we have concrete sizes, put something in */ + if (ft->type_kind != FYTK_PTR) { + base_type_kind = select_primitive_type_kind_for_array(rfl, ft->size, ft->align); + if (base_type_kind == FYTK_INVALID) + goto err_out; + tki_base = fy_type_kind_info_get_internal(base_type_kind); + element_count = ft->size / tki_base->size; + } else { + base_type_kind = FYTK_VOID; + tki_base = fy_type_kind_info_get_internal(base_type_kind); + element_count = 0; + } + + decl_type = ft->dependent_type_kind == FYTK_STRUCT ? FYDT_STRUCT : + ft->dependent_type_kind == FYTK_UNION ? FYDT_UNION : + FYDT_TYPEDEF; + + name = strchr(ft->dependent_type_name, ' '); + if (name) { + while (*name == ' ') + name++; + } + if (!name || !name[0]) + name = ft->dependent_type_name; + + if (ft->decl) + imp = ft->decl->imp; + else + imp = fy_import_list_head(&rfl->imports); + if (!imp) + goto err_out; + + /* create fake declaration */ + decl = fy_decl_create(rfl, imp, NULL, decl_type, name, NULL); + if (!decl) + goto err_out; + decl->is_synthetic = true; + decl->in_system_header = true; + + if (element_count > 0) { + ftt_inner = lookup_or_create_primitive_type_array(rfl, base_type_kind, element_count); + if (!ftt_inner) + goto err_out; + } else if (fy_type_kind_is_dependent(ft->dependent_type_kind)) { + ftt_inner = lookup_or_create_primitive_type(rfl, base_type_kind); + if (!ftt_inner) + goto err_out; + } else + ftt_inner = NULL; + + if (decl_type != FYDT_TYPEDEF && ftt_inner) { + declc = fy_decl_create(rfl, imp, decl, FYDT_FIELD, "", NULL); + if (!declc) + goto err_out; + declc->type = ftt_inner; + declc->field_decl.byte_offset = 0; + declc->is_synthetic = true; + declc->anonymous = true; + declc->in_system_header = true; + + fy_decl_list_add_tail(&decl->children, declc); + } + + fy_decl_list_add_tail(&rfl->decls, decl); + + ftt = fy_type_create(rfl, ft->dependent_type_kind, name, decl, NULL); + if (!ftt) + goto err_out; + + if (fy_type_kind_is_dependent(ftt->type_kind)) + ftt->dependent_type = ftt_inner; + else if (element_count > 0) { + ftt->size = ftt_inner->size; + ftt->align = ftt_inner->align; + } + + fy_type_list_add_tail(&rfl->types, ftt); + + decl->type = ftt; + +done: + assert(ftt); + + ft->dependent_type = ftt; + ft->is_synthetic = true; + + return 0; +err_out: + if (synthetic_name) + free(synthetic_name); + return -1; +} + +static int +fix_unresolved_types(struct fy_reflection *rfl, bool no_pointers) +{ + struct fy_type *ft, *ftn; + int ret; + + /* nothing to do in this case */ + if (!rfl || !rfl->unresolved_types_count) + return 0; + + for (ft = fy_type_list_head(&rfl->types); + rfl->unresolved_types_count > 0 && ft != NULL; ft = ftn) { + + ftn = fy_type_next(&rfl->types, ft); + + if (fy_type_is_resolved(ft)) + continue; + + if (no_pointers && ft->type_kind == FYTK_PTR) + continue; + + ret = fix_unresolved_type(ft); + if (ret < 0) + goto err_out; + + ft->unresolved = false; + rfl->unresolved_types_count--; + } + + return 0; + +err_out: + return -1; +} + +void fy_reflection_fix_unresolved(struct fy_reflection *rfl) +{ + int ret; + + /* first pass, don't fix pointers */ + ret = fix_unresolved_types(rfl, true); + if (ret) + goto err_out; + + /* second pass, fix pointers */ + ret = fix_unresolved_types(rfl, false); + if (ret) + goto err_out; + return; +err_out: + return; +} + +void fy_reflection_dump(struct fy_reflection *rfl, bool marked_only, bool no_location) +{ + struct fy_import *imp; + struct fy_decl *decl; + struct fy_source_file *srcf; + struct fy_type *ft; + + if (!rfl) + return; + + printf("Reflection imports:\n"); + for (imp = fy_import_list_head(&rfl->imports); imp != NULL; imp = fy_import_next(&rfl->imports, imp)) { + + if (marked_only && !imp->marker) + continue; + + printf("\t%c %s\n", + imp->marker ? '*' : ' ', + imp->name); + + } + + printf("Reflection decls:\n"); + for (decl = fy_decl_list_head(&rfl->decls); decl != NULL; decl = fy_decl_next(&rfl->decls, decl)) { + + if (marked_only && !decl->marker) + continue; + + fy_decl_dump(decl, 0, no_location); + } + + printf("Reflection types:\n"); + for (ft = fy_type_list_head(&rfl->types); ft != NULL; ft = fy_type_next(&rfl->types, ft)) { + + if (marked_only && !ft->marker) + continue; + + fy_type_dump(ft, no_location); + } + + printf("Reflection files:\n"); + for (srcf = fy_source_file_list_head(&rfl->source_files); srcf != NULL; srcf = fy_source_file_next(&rfl->source_files, srcf)) { + if (marked_only && !srcf->marker) + continue; + + fy_source_file_dump(srcf); + } +} + +const char *fy_import_get_name(struct fy_import *imp) +{ + return imp ? imp->name : NULL; +} + +struct fy_type * +fy_type_iterate(struct fy_reflection *rfl, void **prevp) +{ + if (!rfl || !prevp) + return NULL; + + return *prevp = *prevp ? fy_type_next(&rfl->types, *prevp) : fy_type_list_head(&rfl->types); +} + +struct fy_reflection * +fy_reflection_from_imports(const char *backend_name, const void *backend_cfg, + int num_imports, const void *import_cfgs[]) +{ + const struct fy_reflection_backend *backend; + struct fy_reflection_cfg rcfg; + struct fy_reflection *rfl = NULL; + int i, rc; + + if (!backend_name || num_imports <= 0) + return NULL; + + backend = fy_reflection_backend_lookup(backend_name); + if (!backend) + return NULL; + + memset(&rcfg, 0, sizeof(rcfg)); + rcfg.backend_cfg = backend_cfg; + rcfg.backend = backend; + + rfl = fy_reflection_create(&rcfg); + if (!rfl) + goto err_out; + + /* do the imports */ + for (i = 0; i < num_imports; i++) { + rc = fy_reflection_import(rfl, import_cfgs ? import_cfgs[i] : NULL); + if (rc) + goto err_out; + break; + } + + /* if the reflection is not resolve, try to resolve it */ + if (!fy_reflection_is_resolved(rfl)) + fy_reflection_fix_unresolved(rfl); + + fy_reflection_dump(rfl, false, false); + + /* if still unresolved, failure */ + if (!fy_reflection_is_resolved(rfl)) + goto err_out; + + /* renumber to better numbers */ + fy_reflection_renumber(rfl); + + /* update the type info */ + fy_reflection_update_type_info(rfl); + + return rfl; + +err_out: + fy_reflection_destroy(rfl); + return NULL; +} + +struct fy_reflection * +fy_reflection_from_import(const char *backend_name, const void *backend_cfg, const void *import_cfg) +{ + return fy_reflection_from_imports(backend_name, backend_cfg, + 1, import_cfg ? &import_cfg : NULL); +} + +const struct fy_type_info * +fy_type_info_iterate(struct fy_reflection *rfl, void **prevp) +{ + struct fy_type *ft; + + if (!rfl || !prevp) + return NULL; + + ft = *prevp = *prevp ? fy_type_next(&rfl->types, *prevp) : fy_type_list_head(&rfl->types); + if (!ft) + return NULL; + return fy_type_get_type_info(ft); +} + +const struct fy_type_info * +fy_type_info_reverse_iterate(struct fy_reflection *rfl, void **prevp) +{ + struct fy_type *ft; + + if (!rfl || !prevp) + return NULL; + + ft = *prevp = *prevp ? fy_type_prev(&rfl->types, *prevp) : fy_type_list_tail(&rfl->types); + if (!ft) + return NULL; + return fy_type_get_type_info(ft); +} + +struct fy_reflection * +fy_type_info_to_reflection(const struct fy_type_info *ti) +{ + struct fy_type *ft; + + ft = fy_type_from_info(ti); + if (!ft) + return NULL; + return ft->rfl; +} + +char * +fy_type_info_generate_name(const struct fy_type_info *ti, const char *field, bool normalized) +{ + struct fy_type *ft; + + ft = fy_type_from_info(ti); + if (!ft) + return NULL; + + return fy_type_generate_name(ft, field, normalized); +} + +char * +fy_type_name_normalize(const char *type_name) +{ + const char *s; + char *d; + char *buf; + char c, lastc; + + if (!type_name) + return NULL; + + /* the buffer will never grow, so worst case allocation */ + buf = malloc(strlen(type_name) + 1); + if (!buf) + return NULL; + + s = type_name; + d = buf; + + lastc = -1; + while ((c = *s++) != '\0') { + /* space, if last and next was alnum keep one, otherwise skip all */ + if (isspace(c)) { + lastc = c; + while ((c = *s) != '\0' && isspace(c)) + s++; + if (isalnum(lastc) && isalnum(c)) + *d++ = ' '; + continue; + } + *d++ = c; + lastc = c; + } + *d = '\0'; + + return buf; +} + +void fy_type_info_clear_marker(const struct fy_type_info *ti) +{ + fy_type_clear_marker(fy_type_from_info(ti)); +} + +void fy_type_info_mark(const struct fy_type_info *ti) +{ + fy_type_mark(fy_type_from_info(ti)); +} + +bool fy_type_info_is_marked(const struct fy_type_info *ti) +{ + struct fy_type *ft; + + ft = fy_type_from_info(ti); + return ft && ft->marker; +} + +size_t fy_type_info_eponymous_offset(const struct fy_type_info *ti) +{ + return fy_type_eponymous_offset(fy_type_from_info(ti)); +} + +int fy_field_info_index(const struct fy_field_info *fi) +{ + const struct fy_type_info *ti; + int idx; + + if (!fi) + return -1; + ti = fi->parent; + assert(ti); + assert(ti->fields); + idx = fi - ti->fields; + assert((unsigned int)idx < ti->count); + return idx; +} + +const struct fy_field_info * +fy_type_info_lookup_field(const struct fy_type_info *ti, const char *name) +{ + int idx; + + idx = fy_type_get_field_index_by_name(fy_type_from_info(ti), name); + if (idx < 0) + return NULL; + assert((unsigned int)idx < ti->count); + return ti->fields + idx; +} + +const struct fy_field_info * +fy_type_info_lookup_field_by_enum_value(const struct fy_type_info *ti, long long val) +{ + struct fy_type *ft; + int idx; + + ft = fy_type_from_info(ti); + idx = fy_type_get_field_idx_by_decl(ft, fy_type_get_field_decl_by_enum_value(ft, val)); + if (idx < 0) + return NULL; + assert((unsigned int)idx < ti->count); + return ti->fields + idx; +} + +const struct fy_field_info * +fy_type_info_lookup_field_by_unsigned_enum_value(const struct fy_type_info *ti, unsigned long long val) +{ + struct fy_type *ft; + int idx; + + ft = fy_type_from_info(ti); + idx = fy_type_get_field_idx_by_decl(ft, fy_type_get_field_decl_by_unsigned_enum_value(ft, val)); + if (idx < 0) + return NULL; + assert((unsigned int)idx < ti->count); + return ti->fields + idx; +} + +void fy_type_info_set_userdata(const struct fy_type_info *ti, void *userdata) +{ + fy_type_set_userdata(fy_type_from_info(ti), userdata); +} + +void *fy_type_info_get_userdata(const struct fy_type_info *ti) +{ + return fy_type_get_userdata(fy_type_from_info(ti)); +} + +void fy_field_info_set_userdata(const struct fy_field_info *fi, void *userdata) +{ + fy_decl_set_userdata(fy_decl_from_field_info(fi), userdata); +} + +void *fy_field_info_get_userdata(const struct fy_field_info *fi) +{ + return fy_decl_get_userdata(fy_decl_from_field_info(fi)); +} + +const char *fy_type_info_get_comment(const struct fy_type_info *ti) +{ + return fy_type_get_cooked_comment(fy_type_from_info(ti)); +} + +const char *fy_field_info_get_comment(const struct fy_field_info *fi) +{ + return fy_decl_get_cooked_comment(fy_decl_from_field_info(fi)); +} + +struct fy_document *fy_type_info_get_yaml_annotation(const struct fy_type_info *ti) +{ + return fy_type_get_yaml_annotation(fy_type_from_info(ti)); +} + +struct fy_document *fy_field_info_get_yaml_annotation(const struct fy_field_info *fi) +{ + return fy_decl_get_yaml_annotation(fy_decl_from_field_info(fi)); +} + +const char *fy_type_info_get_yaml_name(const struct fy_type_info *ti) +{ + return fy_type_get_yaml_name(fy_type_from_info(ti)); +} + +const char *fy_field_info_get_yaml_name(const struct fy_field_info *fi) +{ + return fy_decl_get_yaml_name(fy_decl_from_field_info(fi)); +} diff --git a/src/reflection/fy-registry.c b/src/reflection/fy-registry.c new file mode 100644 index 00000000..58670f23 --- /dev/null +++ b/src/reflection/fy-registry.c @@ -0,0 +1,226 @@ +/* + * fy-registry - Backend registy methods + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "fy-utf8.h" +#include "fy-blob.h" + +#include "fy-reflection-private.h" + +#include "fy-packed-backend.h" +#if defined(HAVE_LIBCLANG) && HAVE_LIBCLANG +#include "fy-clang-backend.h" +#endif + +extern const struct fy_reflection_backend fy_reflection_packed_backend; +#if defined(HAVE_LIBCLANG) && HAVE_LIBCLANG +extern const struct fy_reflection_backend fy_reflection_clang_backend; +#endif + +static const struct fy_reflection_backend *builtin_backends[] = { + /* the packed backend is always available */ + &fy_reflection_packed_backend, +#if defined(HAVE_LIBCLANG) && HAVE_LIBCLANG + /* the clang backend is optional */ + &fy_reflection_clang_backend, +#endif +}; + +const struct fy_reflection_backend * +fy_reflection_backend_lookup(const char *name) +{ + const struct fy_reflection_backend *be; + unsigned int i; + + for (i = 0; i < sizeof(builtin_backends)/sizeof(builtin_backends[0]); i++) { + be = builtin_backends[i]; + if (be && be->name && !strcmp(name, be->name)) + return be; + } + return NULL; +} + +#if defined(HAVE_LIBCLANG) && HAVE_LIBCLANG +struct fy_reflection * +fy_reflection_from_c_files(int filec, const char * const filev[], int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) +{ + struct fy_clang_backend_reflection_cfg rcfg; + struct fy_clang_backend_import_cfg *icfgs; + const void **icfgps; + int i, num_imports; + + memset(&rcfg, 0, sizeof(rcfg)); + rcfg.display_diagnostics = display_diagnostics; + + num_imports = filec; + + icfgs = alloca(sizeof(*icfgs) * num_imports); + icfgps = alloca(sizeof(*icfgps) * num_imports); + memset(icfgs, 0, sizeof(*icfgs) * num_imports); + for (i = 0; i < num_imports; i++) { + icfgps[i] = &icfgs[i]; + icfgs[i].file = filev[i]; + icfgs[i].argc = argc; + icfgs[i].argv = argv; + } + + return fy_reflection_from_imports("clang", &rcfg, num_imports, icfgps); +} + +struct fy_reflection * +fy_reflection_from_c_file(const char *file, int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) +{ + return fy_reflection_from_c_files(1, &file, argc, argv, + display_diagnostics, include_comments); +} + +struct fy_reflection * +fy_reflection_from_c_file_with_cflags(const char *file, const char *cflags, + bool display_diagnostics, bool include_comments) +{ + void *mem; + int argc; + const char * const *argv; + struct fy_reflection *rfl; + + mem = fy_utf8_split_posix(cflags, &argc, &argv); + if (!mem) + return NULL; + + rfl = fy_reflection_from_c_file(file, argc, argv, display_diagnostics, include_comments); + + free(mem); + + return rfl; +} + +#else + +struct fy_reflection * +fy_reflection_from_c_files(int filec, const char * const filev[], int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) +{ + return NULL; +} + + +struct fy_reflection * +fy_reflection_from_c_file(const char *file, int argc, const char * const argv[], + bool display_diagnostics, bool include_comments) +{ + return NULL; +} + +struct fy_reflection * +fy_reflection_from_c_file_with_cflags(const char *file, const char *cflags, + bool display_diagnostics, bool include_comments) +{ + return NULL; +} + +#endif + +struct fy_reflection * +fy_reflection_from_packed_blob(const void *blob, size_t blob_size) +{ + struct fy_packed_backend_reflection_cfg packed_cfg; + + memset(&packed_cfg, 0, sizeof(packed_cfg)); + packed_cfg.type = FYPRT_BLOB; + packed_cfg.blob = blob; + packed_cfg.blob_size = blob_size; + packed_cfg.copy = true; /* always copy */ + + return fy_reflection_from_import("packed", &packed_cfg, NULL); +} + +void * +fy_reflection_to_packed_blob(struct fy_reflection *rfl, size_t *blob_sizep, + bool include_comments, bool include_location) +{ + struct fy_packed_generator pg_local, *pg = &pg_local; + void *blob = NULL; + size_t blob_size = 0; + int rc; + + memset(pg, 0, sizeof(*pg)); + + pg->rfl = rfl; + pg->type = FYPGT_BLOB; + pg->blobp = &blob; + pg->blob_sizep = &blob_size; + + rc = fy_packed_generate(pg); + if (rc) + return NULL; + + if (blob_sizep) + *blob_sizep = blob_size; + + return blob; +} + +struct fy_reflection *fy_reflection_from_packed_blob_file(const char *blob_file) +{ + struct fy_reflection *rfl; + size_t blob_size; + void *blob; + + if (!blob_file) + return NULL; + + blob = fy_blob_read(blob_file, &blob_size); + if (!blob) + goto err_out; + + rfl = fy_reflection_from_packed_blob(blob, blob_size); + if (!rfl) + goto err_out; + + free(blob); + return rfl; + +err_out: + if (blob) + free(blob); + return NULL; +} + +int fy_reflection_to_packed_blob_file(struct fy_reflection *rfl, const char *blob_file) +{ + int rc; + void *blob; + size_t blob_size; + + if (!rfl || !blob_file) + return -1; + + blob = fy_reflection_to_packed_blob(rfl, &blob_size, true, true); + if (!blob) + goto err_out; + + rc = fy_blob_write(blob_file, blob, blob_size); + if (rc < 0) + goto err_out; + + free(blob); + + return 0; + +err_out: + if (blob) + free(blob); + return -1; +} diff --git a/src/tool/fy-tool.c b/src/tool/fy-tool.c index 13d8b65c..68f2ff47 100644 --- a/src/tool/fy-tool.c +++ b/src/tool/fy-tool.c @@ -17,6 +17,11 @@ #include #include #include +#include +#include +#include +#include +#include #include @@ -66,6 +71,7 @@ #define OPT_PARSE_DUMP 1007 #define OPT_YAML_VERSION_DUMP 1008 #define OPT_COMPOSE 1009 +#define OPT_REFLECT 1010 #define OPT_STRIP_LABELS 2000 #define OPT_STRIP_TAGS 2001 @@ -88,6 +94,15 @@ #define OPT_STRIP_EMPTY_KV 2019 #define OPT_DISABLE_MMAP 2020 #define OPT_TSV_FORMAT 2021 +#define OPT_CFLAGS 2022 +#define OPT_TYPE_DUMP 2023 +#define OPT_IMPORT_BLOB 2024 +#define OPT_GENERATE_BLOB 2025 +#define OPT_PRUNE_SYSTEM 2026 +#define OPT_TYPE_INCLUDE 2027 +#define OPT_TYPE_EXCLUDE 2028 +#define OPT_IMPORT_C_FILE 2029 +#define OPT_ENTRY_TYPE 2030 #define OPT_DISABLE_DIAG 3000 #define OPT_ENABLE_DIAG 3001 @@ -152,6 +167,16 @@ static struct option lopts[] = { {"tsv-format", no_argument, 0, OPT_TSV_FORMAT }, {"to", required_argument, 0, 'T' }, {"from", required_argument, 0, 'F' }, + {"reflect", no_argument, 0, OPT_REFLECT }, + {"type-dump", no_argument, 0, OPT_TYPE_DUMP }, + {"entry-type", required_argument, 0, OPT_ENTRY_TYPE }, + {"cflags", required_argument, 0, OPT_CFLAGS }, + {"generate-blob", required_argument, 0, OPT_GENERATE_BLOB }, + {"import-blob", required_argument, 0, OPT_IMPORT_BLOB }, + {"import-c-file", required_argument, 0, OPT_IMPORT_C_FILE }, + {"prune-system", no_argument, 0, OPT_PRUNE_SYSTEM }, + {"type-include", required_argument, 0, OPT_TYPE_INCLUDE }, + {"type-exclude", required_argument, 0, OPT_TYPE_EXCLUDE }, {"quiet", no_argument, 0, 'q' }, {"help", no_argument, 0, 'h' }, {"version", no_argument, 0, 'v' }, @@ -290,6 +315,15 @@ static void display_usage(FILE *fp, char *progname, int tool_mode) fprintf(fp, "\t--dump-path : Dump the path while composing\n"); } + if (tool_mode == OPT_REFLECT) { + fprintf(fp, "\t--type-dump : Dump types from the reflection\n"); + fprintf(fp, "\t--generate-blob : Generate packed blob from C source files\n"); + fprintf(fp, "\t--import-blob : Import a packed blob as a reflection source\n"); + fprintf(fp, "\t--import-c-file : Import a C file as a reflection source\n"); + fprintf(fp, "\t--cflags : The C flags to use for the import\n"); + fprintf(fp, "\t--entry-type : The C type that is the entry point (i.e. the document)\n"); + } + if (tool_mode == OPT_TOOL) { fprintf(fp, "\t--dump : Dump mode, [arguments] are file names\n"); fprintf(fp, "\t--testsuite : Testsuite mode, [arguments] are s to output parse events\n"); @@ -376,6 +410,20 @@ static void display_usage(FILE *fp, char *progname, int tool_mode) fprintf(fp, "\t$ %s --compose -mjson \">foo: bar\"\n", progname); fprintf(fp, "\t{\n\t \"foo\": \"bar\"\n\t}\n"); break; + case OPT_REFLECT: + fprintf(fp, "\tReflection parsing a C header and dumping type info\n"); + fprintf(fp, "\t$ %s [--cflags=<>] header.h\n\t...\n", progname); + fprintf(fp, "\n"); + fprintf(fp, "\tReflection parsing a C header and dumping type info\n"); + fprintf(fp, "\t$ %s blob.bin\n\t...\n", progname); + fprintf(fp, "\n"); + fprintf(fp, "\tReflection convert C header files definition to a blob\n"); + fprintf(fp, "\t$ %s --reflect [--cflags=<>] --generate-blob=blob.bin header1.h header2.h\n\t...\n", progname); + fprintf(fp, "\n"); + fprintf(fp, "\tParse and dump generated YAML document from the input string\n"); + fprintf(fp, "\t$ %s --compose -mjson \">foo: bar\"\n", progname); + fprintf(fp, "\t{\n\t \"foo\": \"bar\"\n\t}\n"); + break; case OPT_YAML_VERSION_DUMP: fprintf(fp, "\tDisplay information about the YAML versions libfyaml supports)\n"); fprintf(fp, "\n"); @@ -1538,245 +1586,1826 @@ compose_process_event(struct fy_parser *fyp, struct fy_event *fye, struct fy_pat return FYCR_ERROR; } -int main(int argc, char *argv[]) +static void comment_dump(int level, const char *comment) { - struct fy_parse_cfg cfg = { - .search_path = INCLUDE_DEFAULT, - .flags = - (QUIET_DEFAULT ? FYPCF_QUIET : 0) | - (RESOLVE_DEFAULT ? FYPCF_RESOLVE_DOCUMENT : 0) | - (DISABLE_ACCEL_DEFAULT ? FYPCF_DISABLE_ACCELERATORS : 0) | - (DISABLE_BUFFERING_DEFAULT ? FYPCF_DISABLE_BUFFERING : 0) | - (DISABLE_DEPTH_LIMIT_DEFAULT ? FYPCF_DISABLE_DEPTH_LIMIT : 0) | - (SLOPPY_FLOW_INDENTATION_DEFAULT ? FYPCF_SLOPPY_FLOW_INDENTATION : 0) | - (PREFER_RECURSIVE_DEFAULT ? FYPCF_PREFER_RECURSIVE : 0) | - (YPATH_ALIASES_DEFAULT ? FYPCF_YPATH_ALIASES : 0), - }; - struct fy_emitter_cfg emit_cfg; - struct fy_parser *fyp = NULL; - struct fy_emitter *fye = NULL; - int rc, exitcode = EXIT_FAILURE, opt, lidx, i, j, step = 1; - enum fy_error_module errmod; - unsigned int errmod_mask; - bool show; - int indent = INDENT_DEFAULT; - int width = WIDTH_DEFAULT; - bool follow = FOLLOW_DEFAULT; - const char *to = TO_DEFAULT; - const char *from = FROM_DEFAULT; - const char *color = COLOR_DEFAULT; - const char *file = NULL, *trim = TRIM_DEFAULT; - char *tmp, *s, *progname; - struct fy_document *fyd, *fyd_join = NULL; - struct dump_userdata du; - enum fy_emitter_cfg_flags emit_flags = 0; - struct fy_node *fyn, *fyn_emit, *fyn_to, *fyn_from; - int count_ins = 0; - struct fy_document **fyd_ins = NULL; - int tool_mode = OPT_TOOL; - struct fy_event *fyev; - struct fy_token *fyt; - bool join_resolve = RESOLVE_DEFAULT; - struct fy_token_iter *iter; - bool streaming = STREAMING_DEFAULT; - struct fy_diag_cfg dcfg; - struct fy_diag *diag = NULL; - struct fy_path_parse_cfg pcfg; - struct fy_path_expr *expr = NULL; - struct fy_path_exec_cfg xcfg; - struct fy_path_exec *fypx = NULL; - struct fy_node *fyn_start; - bool dump_pathexpr = false; - bool noexec = false; - bool null_output = false; - bool stdin_input; - void *res_iter; - bool disable_flow_markers = DISABLE_FLOW_MARKERS_DEFAULT; - bool document_event_stream = DOCUMENT_EVENT_STREAM_DEFAULT; - bool collect_errors = COLLECT_ERRORS_DEFAULT; - bool allow_duplicate_keys = ALLOW_DUPLICATE_KEYS_DEFAULT; - bool tsv_format = TSV_FORMAT_DEFAULT; - struct composer_data cd; - bool dump_path = DUMP_PATH_DEFAULT; - const char *input_arg; + size_t len; + const char *s, *e, *le; - fy_valgrind_check(&argc, &argv); + if (!comment) + return; - /* select the appropriate tool mode */ - progname = argv[0]; - progname = strrchr(argv[0], '/'); - if (!progname) - progname = argv[0]; - else - progname++; + len = strlen(comment); + s = comment; + e = s + len; + while (s < e) { + le = strchr(s, '\n'); + len = le ? (size_t)(le - s) : strlen(s); + printf("%*s// %.*s\n", (int)(level * 4), "", (int)len, s); + s += len + 1; + } +} - /* default mode is dump */ - if (!strcmp(progname, "fy-filter")) - tool_mode = OPT_FILTER; - else if (!strcmp(progname, "fy-testsuite")) - tool_mode = OPT_TESTSUITE; - else if (!strcmp(progname, "fy-dump")) - tool_mode = OPT_DUMP; - else if (!strcmp(progname, "fy-join")) - tool_mode = OPT_JOIN; - else if (!strcmp(progname, "fy-ypath")) - tool_mode = OPT_YPATH; - else if (!strcmp(progname, "fy-scan-dump")) - tool_mode = OPT_SCAN_DUMP; - else if (!strcmp(progname, "fy-parse-dump")) - tool_mode = OPT_PARSE_DUMP; - else if (!strcmp(progname, "fy-compose")) - tool_mode = OPT_COMPOSE; - else if (!strcmp(progname, "fy-yaml-version-dump")) - tool_mode = OPT_YAML_VERSION_DUMP; - else - tool_mode = OPT_TOOL; +static void type_info_dump(const struct fy_type_info *ti, int level) +{ + const struct fy_field_info *fi; + size_t i; + + comment_dump(level, fy_type_info_get_comment(ti)); + printf("%s size=%zu align=%zu", ti->fullname, ti->size, ti->align); + if (ti->dependent_type) + printf(" -> %s", ti->dependent_type->fullname); + printf("\n"); + + if (fy_type_kind_has_fields(ti->kind)) { + for (i = 0, fi = ti->fields; i < ti->count; i++, fi++) { + comment_dump(level + 1, fy_field_info_get_comment(fi)); + printf("%*s%s %s", (level + 1) * 4, "", fi->type_info->fullname, fi->name); + if (!(fi->flags & FYFIF_BITFIELD)) + printf(" offset=%zu", fi->offset); + else + printf(" bit_offset=%zu bit_width=%zu", fi->bit_offset, fi->bit_width); + printf("\n"); + } + } +} - fy_diag_cfg_default(&dcfg); - /* XXX remember to modify this if you change COLOR_DEFAULT */ +void reflection_type_info_dump(struct fy_reflection *rfl) +{ + const struct fy_type_info *ti; + void *prev = NULL; - memset(&du, 0, sizeof(du)); - du.fp = stdout; - du.colorize = isatty(fileno(stdout)) == 1; - du.visible = VISIBLE_DEFAULT; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + type_info_dump(ti, 0); + } +} - emit_flags = (SORT_DEFAULT ? FYECF_SORT_KEYS : 0) | - (COMMENT_DEFAULT ? FYECF_OUTPUT_COMMENTS : 0) | - (STRIP_LABELS_DEFAULT ? FYECF_STRIP_LABELS : 0) | - (STRIP_TAGS_DEFAULT ? FYECF_STRIP_TAGS : 0) | - (STRIP_DOC_DEFAULT ? FYECF_STRIP_DOC : 0); - apply_mode_flags(MODE_DEFAULT, &emit_flags); +static void type_info_c_with_fields_dump(const struct fy_type_info *ti, int level, const char *field_name, bool no_first_pad) +{ + const struct fy_type_kind_info *tki; + const struct fy_field_info *fi; + char *name; + size_t i, e_offset;; - while ((opt = getopt_long_only(argc, argv, - "I:" "d:" "i:" "w:" "rsc" "C:" "m:" "V" "f:" "t:" "T:F:" "j:" "qhvl", - lopts, &lidx)) != -1) { - switch (opt) { - case 'I': - tmp = alloca(strlen(cfg.search_path) + 1 + strlen(optarg) + 1); - s = tmp; - strcpy(s, cfg.search_path); - if (cfg.search_path && cfg.search_path[0]) { - s += strlen(cfg.search_path); - *s++ = ':'; - } - strcpy(s, optarg); - s += strlen(optarg); - *s = '\0'; - cfg.search_path = tmp; - break; - case 'i': - indent = atoi(optarg); - if (indent < 0 || indent > FYECF_INDENT_MASK) { - fprintf(stderr, "bad indent option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } + if (!ti || !fy_type_kind_has_fields(ti->kind)) + return; - break; - case 'w': - width = atoi(optarg); - if (width < 0 || width > FYECF_WIDTH_MASK) { - fprintf(stderr, "bad width option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } - break; - case 'd': - dcfg.level = fy_string_to_error_type(optarg); - if (dcfg.level == FYET_MAX) { - fprintf(stderr, "bad debug level option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } - break; - case OPT_DISABLE_DIAG: - case OPT_ENABLE_DIAG: - if (!strcmp(optarg, "all")) { - errmod_mask = FY_BIT(FYEM_MAX) - 1; + tki = fy_type_kind_info_get(ti->kind); + assert(tki); + + if (!no_first_pad) { + comment_dump(level, fy_type_info_get_comment(ti)); + printf("%*s", level * 4, ""); + } + printf("%s", tki->name); + if (!(ti->flags & FYTIF_ANONYMOUS)) + printf(" %s", ti->name); + printf(" {"); + printf("\t/* "); + if (ti->flags & FYTIF_ANONYMOUS) { + e_offset = fy_type_info_eponymous_offset(ti); + printf("offset=%zu, ", e_offset); + } else { + e_offset = 0; + } + printf("size=%zu, align=%zu */", ti->size, ti->align); + printf("\n"); + for (i = 0, fi = ti->fields; i < ti->count; i++, fi++) { + comment_dump(level+1, fy_field_info_get_comment(fi)); + if (!(fi->type_info->flags & FYTIF_ANONYMOUS)) { + + printf("%*s", (level + 1) * 4, ""); + if (ti->kind == FYTK_ENUM) { + printf("%s", fi->name); + if (fi->flags & FYFIF_ENUM_UNSIGNED) + printf(" = %llu", fi->uval); + else + printf(" = %lld", fi->sval); + printf(",\n"); } else { - errmod = fy_string_to_error_module(optarg); - if (errmod == FYEM_MAX) { - fprintf(stderr, "bad error module option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; + name = fy_type_info_generate_name(fi->type_info, fi->name, false); + assert(name); + if (!(fi->flags & FYFIF_BITFIELD)) { + printf("%s;", name); + printf("\t/* offset=%zu, size=%zu */", e_offset + fi->offset, fi->type_info->size); + } else { + printf("%s ", name); + printf(": %zu;", fi->bit_width); + printf("\t/* bit_offset=%zu, byte_offset=%zu, byte_bit_offset=%zu */", + (e_offset * 8 + fi->bit_offset), + (e_offset * 8 + fi->bit_offset) / 8, + fi->bit_offset % 8); } - errmod_mask = FY_BIT(errmod); + free(name); + printf("\n"); } - if (opt == OPT_DISABLE_DIAG) - dcfg.module_mask &= ~errmod_mask; - else - dcfg.module_mask |= errmod_mask; - break; + } else { + type_info_c_with_fields_dump(fi->type_info, level + 1, fi->name, false); + } + } - case OPT_SHOW_DIAG: - case OPT_HIDE_DIAG: - show = opt == OPT_SHOW_DIAG; - if (!strcmp(optarg, "source")) { - dcfg.show_source = show; - } else if (!strcmp(optarg, "position")) { - dcfg.show_position = show; - } else if (!strcmp(optarg, "type")) { - dcfg.show_type = show; - } else if (!strcmp(optarg, "module")) { - dcfg.show_module = show; - } else { - fprintf(stderr, "bad %s option %s\n", - show ? "show" : "hide", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } - break; + printf("%*s", level * 4, ""); + if (!field_name || !field_name[0]) + printf("}"); + else + printf("} %s", field_name); - case 'r': - cfg.flags |= FYPCF_RESOLVE_DOCUMENT; - break; - case 's': - emit_flags |= FYECF_SORT_KEYS; - break; - case 'c': - cfg.flags |= FYPCF_PARSE_COMMENTS; - emit_flags |= FYECF_OUTPUT_COMMENTS; - break; - case 'C': - color = optarg; - if (!strcmp(color, "auto")) { - dcfg.colorize = isatty(fileno(stderr)) == 1; - du.colorize = isatty(fileno(stdout)) == 1; - } - else if (!strcmp(color, "yes") || !strcmp(color, "1") || !strcmp(color, "on")) { - dcfg.colorize = true; - du.colorize = true; - } else if (!strcmp(color, "no") || !strcmp(color, "0") || !strcmp(color, "off")) { - dcfg.colorize = false; - du.colorize = false; - } else { - fprintf(stderr, "bad color option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } - break; - case 'm': - rc = apply_mode_flags(optarg, &emit_flags); - if (rc) { - fprintf(stderr, "bad mode option %s\n", optarg); - display_usage(stderr, progname, tool_mode); - return EXIT_FAILURE; - } - break; - case 'V': - du.visible = true; - break; - case 'l': - follow = true; - break; - case 'q': - cfg.flags |= FYPCF_QUIET; - dcfg.output_fn = no_diag_output_fn; - dcfg.fp = NULL; - dcfg.colorize = false; - break; - case 'f': - file = optarg; + printf(";"); + + if (ti->flags & FYTIF_ANONYMOUS) { + printf("\t/* anonymous */"); + } + printf("\n"); +} + +static void type_info_c_typedef_dump(const struct fy_type_info *ti, int level) +{ + char *name; + + if (!ti || ti->kind != FYTK_TYPEDEF) + return; + + comment_dump(level, fy_type_info_get_comment(ti)); + printf("%*stypedef ", level * 4, ""); + + assert(ti->dependent_type); + if (!(ti->dependent_type->flags & FYTIF_ANONYMOUS)) { + name = fy_type_info_generate_name(ti->dependent_type, ti->name, false); + assert(name); + printf("%s;", name); + free(name); + printf("\t/* size=%zu, align=%zu */", ti->size, ti->align); + printf("\n"); + } else { + type_info_c_with_fields_dump(ti->dependent_type, level, ti->name, true); + } +} + +void reflection_type_info_c_dump(struct fy_reflection *rfl) +{ + const struct fy_type_info *ti; + void *prev = NULL; + + prev = NULL; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + if (ti->flags & FYTIF_ANONYMOUS) + continue; + if (ti->kind == FYTK_TYPEDEF) { + type_info_c_typedef_dump(ti, 0); + } else if (fy_type_kind_has_fields(ti->kind)) { + type_info_c_with_fields_dump(ti, 0, NULL, false); + } + } +} + +void reflection_prune_system(struct fy_reflection *rfl) +{ + const struct fy_type_info *ti; + void *prev = NULL; + + fy_reflection_clear_all_markers(rfl); + + /* mark all non system and keep them */ + prev = NULL; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + if (ti->flags & FYTIF_SYSTEM_HEADER) + continue; + /* mark all non system structs, unions, enums and typedefs */ + if (fy_type_kind_has_fields(ti->kind) || ti->kind == FYTK_TYPEDEF) + fy_type_info_mark(ti); + } + fy_reflection_prune_unmarked(rfl); +} + +int reflection_type_filter(struct fy_reflection *rfl, + const char *type_include, const char *type_exclude) +{ + const struct fy_type_info *ti; + void *prev = NULL; + regex_t type_include_reg, type_exclude_reg; + bool type_include_reg_compiled = false, type_exclude_reg_compiled = false; + bool include_match, exclude_match; + int ret; + + if (!type_include && !type_exclude) + return 0; + + if (type_include) { + ret = regcomp(&type_include_reg, type_include, REG_EXTENDED | REG_NOSUB); + if (ret) { + fprintf(stderr, "Bad type-include regexp '%s'\n", type_include); + goto err_out; + } + type_include_reg_compiled = true; + } + + if (type_exclude) { + ret = regcomp(&type_exclude_reg, type_exclude, REG_EXTENDED | REG_NOSUB); + if (ret) { + fprintf(stderr, "Bad type-exclude regexp '%s'\n", type_exclude); + goto err_out; + } + type_exclude_reg_compiled = true; + } + + fy_reflection_clear_all_markers(rfl); + prev = NULL; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + if (type_include) { + ret = regexec(&type_include_reg, ti->fullname, 0, NULL, 0); + include_match = ret == 0; + } else + include_match = true; + + if (type_exclude) { + ret = regexec(&type_include_reg, ti->fullname, 0, NULL, 0); + exclude_match = ret == 0; + } else + exclude_match = false; + + if (include_match && !exclude_match) + fy_type_info_mark(ti); + } + fy_reflection_prune_unmarked(rfl); + + ret = 0; +err_out: + if (type_exclude_reg_compiled) + regfree(&type_exclude_reg); + + if (type_include_reg_compiled) + regfree(&type_include_reg); + + return ret; +} + +const struct fy_type_info * +reflection_lookup_type_by_name(struct fy_reflection *rfl, const char *name) +{ + const struct fy_type_info *ti; + void *prev = NULL; + char *nname; + + nname = fy_type_name_normalize(name); + if (!nname) + return NULL; + + prev = NULL; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + if (!strcmp(ti->normalized_name, nname)) + break; + } + free(nname); + return ti; +} + +const struct fy_type_info * +reflection_type_resolve_type(const struct fy_type_info *ti) +{ + if (!ti) + return NULL; + while (ti && ti->kind == FYTK_TYPEDEF) + ti = ti->dependent_type; + return ti; +} + +struct reflection_type_data; +struct reflection_field_data; +struct reflection_decoder; +struct reflection_object; +struct reflection_encoder; + +struct reflection_object_ops { + int (*setup)(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path); + void (*cleanup)(struct reflection_object *ro); + int (*finish)(struct reflection_object *ro); + struct reflection_object *(*create_child)(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path); + int (*scalar_child)(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path); +}; + +struct reflection_object { + struct reflection_decoder *rd; + struct reflection_object *parent; + struct reflection_type_data *rtd; + const struct reflection_object_ops *ops; + void *instance_data; + void *data; + size_t data_size; +}; + +struct reflection_field_data { + struct reflection_type_data *rtd; + const struct fy_field_info *fi; + size_t index; +}; + +struct reflection_type_ops { + const struct reflection_object_ops *(*object_ops)(struct reflection_type_data *rtd); + int (*emit)(struct reflection_type_data *rtd, struct fy_emitter *fye, const void *data, size_t data_size); +}; + +struct reflection_type_data { + const struct fy_type_info *ti; + const struct reflection_type_ops *ops; + void *data; + size_t fields_count; + struct reflection_field_data fields[]; +}; + +struct reflection_decoder { + bool null_output; + bool document_ready; + bool verbose; + bool single_document; + + /* bindable */ + struct reflection_type_data *entry; + void *data; + size_t data_size; + bool data_allocated; +}; + +struct reflection_type_data * +reflection_type_data_get_dependent(struct reflection_type_data *rtd) +{ + if (!rtd || !rtd->ti || !rtd->ti->dependent_type) + return NULL; + + return fy_type_info_get_userdata(rtd->ti->dependent_type); +} + +struct reflection_field_data * +reflection_type_data_lookup_field(struct reflection_type_data *rtd, const char *field) +{ + int idx; + + if (!rtd || !field) + return NULL; + + idx = fy_field_info_index(fy_type_info_lookup_field(rtd->ti, field)); + if (idx < 0) + return NULL; + + assert((unsigned int)idx < rtd->fields_count); + return &rtd->fields[idx]; +} + +struct reflection_field_data * +reflection_type_data_lookup_field_by_enum_value(struct reflection_type_data *rtd, long long val) +{ + int idx; + + if (!rtd) + return NULL; + + idx = fy_field_info_index(fy_type_info_lookup_field_by_enum_value(rtd->ti, val)); + if (idx < 0) + return NULL; + + assert((unsigned int)idx < rtd->fields_count); + return &rtd->fields[idx]; +} + +struct reflection_field_data * +reflection_type_data_lookup_field_by_unsigned_enum_value(struct reflection_type_data *rtd, unsigned long long val) +{ + int idx; + + if (!rtd) + return NULL; + + idx = fy_field_info_index(fy_type_info_lookup_field_by_unsigned_enum_value(rtd->ti, val)); + if (idx < 0) + return NULL; + + assert((unsigned int)idx < rtd->fields_count); + return &rtd->fields[idx]; +} + +struct reflection_object * +reflection_object_create_from_type(struct reflection_object *ro_parent, struct reflection_type_data *rtd, + struct fy_event *fye, struct fy_path *path, + void *data, size_t data_size); + +static int int_setup(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path) +{ + int *valp; + const char *text0; + int rc; + + fprintf(stderr, "%s\n", __func__); + if (fye->type != FYET_SCALAR) + return -1; + + assert(ro->rtd->ti->kind == FYTK_INT); + + assert(ro->data); + assert(ro->data_size == sizeof(int)); + assert(((uintptr_t)ro->data & (alignof(int) - 1)) == 0); + valp = ro->data; + + text0 = fy_token_get_text0(fy_event_get_token(fye)); + assert(text0); + + rc = sscanf(text0, "%d", valp); + if (rc != 1) + return -1; + + fprintf(stderr, "%s: %d\n", __func__, *valp); + + return 0; +} + +const struct reflection_object_ops *int_object_ops(struct reflection_type_data *rtd) +{ + static const struct reflection_object_ops ops = { + .setup = int_setup, + }; + + return &ops; +} + +int int_emit(struct reflection_type_data *rtd, struct fy_emitter *fye, const void *data, size_t data_size) +{ + char buf[32]; /* maximum buffer space needed for 64 bit integer is 21, use 32 */ + int len; + + assert(data_size == sizeof(int)); + assert(((uintptr_t)data & (alignof(int) - 1)) == 0); + + len = snprintf(buf, sizeof(buf), "%d", *(const int *)data); + return fy_emit_event(fye, fy_emit_event_create(fye, FYET_SCALAR, FYSS_PLAIN, buf, (size_t)len, NULL, NULL)); +} + +static int const_array_setup(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path) +{ + fprintf(stderr, "%s\n", __func__); + if (fye->type != FYET_SEQUENCE_START) { + assert(0); + return -1; + } + + assert(ro->data); + ro->instance_data = (void *)(uintptr_t)-1; /* last index of const array */ + + return 0; +} + +static int const_array_finish(struct reflection_object *ro) +{ + int last_idx; + + last_idx = (int)(uintptr_t)ro->instance_data; + if (last_idx != (int)(ro->rtd->ti->count - 1)) /* verify all filled */ + return -1; + + return 0; +} + +static void const_array_cleanup(struct reflection_object *ro) +{ + ro->instance_data = NULL; +} + +struct reflection_object *const_array_create_child(struct reflection_object *ro_parent, struct fy_event *fye, struct fy_path *path) +{ + struct reflection_object *ro; + struct reflection_type_data *rtd_dep; + size_t item_size; + int idx; + void *data; + + fprintf(stderr, "%s\n", __func__); + + assert(fy_path_in_sequence(path)); + idx = fy_path_component_sequence_get_index(fy_path_last_not_collection_root_component(path)); + fprintf(stderr, "%s: idx=%d\n", __func__, idx); + + if ((unsigned int)idx >= ro_parent->rtd->ti->count) { + assert(0); + return NULL; + } + + rtd_dep = reflection_type_data_get_dependent(ro_parent->rtd); + assert(rtd_dep); + + item_size = rtd_dep->ti->size; + data = ro_parent->data + item_size * idx; + + ro = reflection_object_create_from_type(ro_parent, rtd_dep, + fye, path, data, item_size); + if (!ro) + return NULL; + + ro_parent->instance_data = (void *)(uintptr_t)idx; /* last index of const array */ + + return ro; +} + +const struct reflection_object_ops *const_array_object_ops(struct reflection_type_data *rtd) +{ + static const struct reflection_object_ops ops = { + .setup = const_array_setup, + .cleanup = const_array_cleanup, + .finish = const_array_finish, + .create_child = const_array_create_child, + }; + + return &ops; +} + +int const_array_emit(struct reflection_type_data *rtd, struct fy_emitter *fye, const void *data, size_t data_size) +{ + struct reflection_type_data *rtd_dep; + size_t idx; + int rc; + + rtd_dep = reflection_type_data_get_dependent(rtd); + assert(rtd_dep); + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_SEQUENCE_START, FYNS_ANY, NULL, NULL)); + if (rc) + goto err_out; + + for (idx = 0; idx < rtd->ti->count; idx++, data += rtd_dep->ti->size) { + assert(rtd_dep->ops->emit); + rc = rtd_dep->ops->emit(rtd_dep, fye, data, rtd_dep->ti->size); + if (rc) + goto err_out; + } + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_SEQUENCE_END)); + if (rc) + goto err_out; + + return 0; +err_out: + return -1; +} + +struct struct_type_data { + uint8_t *required_map; + uint8_t *optional_map; + uint8_t maps[]; +}; + +struct struct_instance_data { + size_t present_map_size; + uint8_t present_map[]; +}; + +static int struct_setup(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path) +{ + struct struct_instance_data *id; + size_t present_map_size, size; + + fprintf(stderr, "%s\n", __func__); + if (fye->type != FYET_MAPPING_START) { + assert(0); + return -1; + } + + assert(ro->data); + + present_map_size = (ro->rtd->ti->count + (8 - 1)) / 8; + size = sizeof(*id) + present_map_size; + id = malloc(size); + assert(id); + memset(id, 0, size); + id->present_map_size = present_map_size; + + ro->instance_data = id; + + return 0; +} + +static void struct_cleanup(struct reflection_object *ro) +{ + struct struct_instance_data *id; + + fprintf(stderr, "%s\n", __func__); + id = ro->instance_data; + if (id) + free(id); + ro->instance_data = NULL; +} + +static int struct_finish(struct reflection_object *ro) +{ + assert(ro->instance_data); + + return 0; +} + +struct reflection_object *struct_create_child(struct reflection_object *ro_parent, struct fy_event *fye, struct fy_path *path) +{ + struct reflection_type_data *rtd; + struct reflection_field_data *rfd; + const struct fy_field_info *fi; + const struct fy_type_info *ti; + struct fy_token *fyt_key; + const char *field; + + fprintf(stderr, "%s\n", __func__); + + assert(fy_path_in_mapping(path)); + assert(!fy_path_in_mapping_key(path)); + + fyt_key = fy_path_component_mapping_get_scalar_key(fy_path_last_not_collection_root_component(path)); + assert(fyt_key); + + field = fy_token_get_text0(fyt_key); + assert(field); + + fprintf(stderr, "field=%s\n", field); + + rtd = ro_parent->rtd; + rfd = reflection_type_data_lookup_field(rtd, field); + if (!rfd) { + return NULL; + } + fi = rfd->fi; + ti = fi->type_info; + /* no bitfields */ + assert((fi->flags & FYFIF_BITFIELD) == 0); + return reflection_object_create_from_type(ro_parent, fy_type_info_get_userdata(ti), + fye, path, ro_parent->data + rfd->fi->offset, ti->size); +} + +const struct reflection_object_ops *struct_object_ops(struct reflection_type_data *rtd) +{ + static const struct reflection_object_ops ops = { + .setup = struct_setup, + .cleanup = struct_cleanup, + .finish = struct_finish, + .create_child = struct_create_child, + }; + + return &ops; +} + +int struct_emit(struct reflection_type_data *rtd, struct fy_emitter *fye, const void *data, size_t data_size) +{ + struct reflection_field_data *rfd; + struct reflection_type_data *rtd_field; + const char *field_name; + size_t i; + int rc; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_MAPPING_START, FYNS_ANY, NULL, NULL)); + if (rc) + goto err_out; + + for (i = 0, rfd = &rtd->fields[0]; i < rtd->fields_count; i++, rfd++) { + + field_name = fy_field_info_get_yaml_name(rfd->fi); + if (!field_name) + field_name = rfd->fi->name; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_SCALAR, FYSS_PLAIN, field_name, FY_NT, NULL, NULL)); + if (rc) + goto err_out; + + rtd_field = fy_type_info_get_userdata(rfd->fi->type_info); + assert(rtd_field); + + assert(rtd_field->ops->emit); + rc = rtd_field->ops->emit(rtd_field, fye, data + rfd->fi->offset, rtd_field->ti->size); + if (rc) + goto err_out; + } + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_MAPPING_END)); + if (rc) + goto err_out; + + return 0; + +err_out: + return -1; +} + +static int enum_setup(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path) +{ + struct reflection_type_data *rtd_dep; + struct reflection_field_data *rfd; + size_t size, align; + const char *text0; + int signess; + union { unsigned long long u; signed long long s; } val; + + + fprintf(stderr, "%s\n", __func__); + if (fye->type != FYET_SCALAR) + return -1; + + assert(ro->rtd->ti->kind == FYTK_ENUM); + rtd_dep = reflection_type_data_get_dependent(ro->rtd); + assert(rtd_dep); + + assert(ro->data); + + /* verify alignment */ + size = rtd_dep->ti->size; + align = rtd_dep->ti->align; + assert(ro->data_size == size); + assert(((uintptr_t)ro->data & (align - 1)) == 0); + (void)size; + (void)align; + + text0 = fy_token_get_text0(fy_event_get_token(fye)); + assert(text0); + + rfd = reflection_type_data_lookup_field(ro->rtd, text0); + assert(rfd); + + /* weird dance, since base signess might differ (but doesn't matter) */ + signess = fy_type_kind_signess(rfd->fi->type_info->kind); + assert(signess != 0); + if (signess > 0) { + val.u = rfd->fi->uval; + // fprintf(stderr, "%llu\n", val.u); + } else { + val.s = rfd->fi->sval; + // fprintf(stderr, "%lld\n", val.s); + } + + switch (rtd_dep->ti->kind) { + case FYTK_CHAR: + if (CHAR_MIN < 0) { + assert(val.s >= CHAR_MIN && val.s <= CHAR_MAX); + *(char *)ro->data = (char)val.s; + } else { + assert(val.u <= CHAR_MAX); + *(char *)ro->data = (char)val.u; + } + break; + case FYTK_SCHAR: + assert(val.s >= SCHAR_MIN && val.s <= SCHAR_MAX); + *(signed char *)ro->data = (signed char)val.s; + break; + case FYTK_UCHAR: + assert(val.u <= UCHAR_MAX); + *(unsigned char *)ro->data = (unsigned char)val.u; + break; + case FYTK_SHORT: + assert(val.s >= SHRT_MIN && val.s <= SHRT_MAX); + *(short *)ro->data = (short)val.s; + break; + case FYTK_USHORT: + assert(val.u <= USHRT_MAX); + *(unsigned short *)ro->data = (unsigned short)val.u; + break; + case FYTK_INT: + assert(val.s >= INT_MIN && val.s <= INT_MAX); + *(int *)ro->data = (int)val.s; + break; + case FYTK_UINT: + assert(val.u <= UINT_MAX); + *(unsigned int *)ro->data = (unsigned int)val.u; + break; + case FYTK_LONG: + assert(val.s >= LONG_MIN && val.s <= LONG_MAX); + *(long *)ro->data = (long)val.s; + break; + case FYTK_ULONG: + assert(val.u <= ULONG_MAX); + *(unsigned long *)ro->data = (unsigned long)val.u; + break; + case FYTK_LONGLONG: + *(long long *)ro->data = val.s; + break; + case FYTK_ULONGLONG: + *(unsigned long long *)ro->data = val.u; + break; + + default: + assert(0); /* err, no more */ + abort(); + } + + return 0; +} + +const struct reflection_object_ops *enum_object_ops(struct reflection_type_data *rtd) +{ + static const struct reflection_object_ops ops = { + .setup = enum_setup, + }; + + return &ops; +} + +int enum_emit(struct reflection_type_data *rtd, struct fy_emitter *fye, const void *data, size_t data_size) +{ + struct reflection_type_data *rtd_dep; + struct reflection_field_data *rfd; + size_t size, align; + int signess; + union { unsigned long long u; signed long long s; } val; + const char *text; + size_t len; + + assert(rtd->ti->kind == FYTK_ENUM); + rtd_dep = reflection_type_data_get_dependent(rtd); + assert(rtd_dep); + + /* verify alignment */ + size = rtd_dep->ti->size; + align = rtd_dep->ti->align; + assert(data_size == size); + assert(((uintptr_t)data & (align - 1)) == 0); + (void)size; + (void)align; + + signess = fy_type_kind_signess(rtd_dep->ti->kind); + assert(signess != 0); + + switch (rtd_dep->ti->kind) { + case FYTK_CHAR: + if (CHAR_MIN < 0) + val.s = *(char *)data; + else + val.u = *(char *)data; + break; + case FYTK_SCHAR: + val.s = *(signed char *)data; + break; + case FYTK_UCHAR: + val.u = *(unsigned char *)data; + break; + case FYTK_SHORT: + val.s = *(short *)data; + break; + case FYTK_USHORT: + val.u = *(unsigned short *)data; + break; + case FYTK_INT: + val.s = *(int *)data; + break; + case FYTK_UINT: + val.u = *(unsigned int *)data; + break; + case FYTK_LONG: + val.s = *(long *)data; + break; + case FYTK_ULONG: + val.u = *(unsigned long *)data; + break; + case FYTK_LONGLONG: + val.s = *(long long *)data; + break; + case FYTK_ULONGLONG: + val.u = *(unsigned long long *)data; + break; + + default: + assert(0); /* err, no more */ + abort(); + } + + if (signess > 0) + rfd = reflection_type_data_lookup_field_by_unsigned_enum_value(rtd, val.u); + else + rfd = reflection_type_data_lookup_field_by_enum_value(rtd, val.s); + + assert(rfd); + text = rfd->fi->name; + len = strlen(text); + + return fy_emit_event(fye, fy_emit_event_create(fye, FYET_SCALAR, FYSS_ANY, text, len, NULL, NULL)); +} + +const struct reflection_type_ops reflection_ops_table[FYTK_COUNT] = { + [FYTK_INVALID] = { + }, + [FYTK_VOID] = { + }, + [FYTK_BOOL] = { + }, + [FYTK_CHAR] = { + }, + [FYTK_SCHAR] = { + }, + [FYTK_UCHAR] = { + }, + [FYTK_SHORT] = { + }, + [FYTK_USHORT] = { + }, + [FYTK_INT] = { + .object_ops = int_object_ops, + .emit = int_emit, + }, + [FYTK_UINT] = { + }, + [FYTK_LONG] = { + }, + [FYTK_ULONG] = { + }, + [FYTK_LONGLONG] = { + }, + [FYTK_ULONGLONG] = { + }, +#ifdef FY_HAS_INT128 + [FYTK_INT128] = { + }, + [FYTK_UINT128] = { + }, +#else + [FYTK_INT128] = { + }, + [FYTK_UINT128] = { + }, +#endif + [FYTK_FLOAT] = { + }, + [FYTK_DOUBLE] = { + }, + [FYTK_LONGDOUBLE] = { + }, +#ifdef FY_HAS_FP16 + [FYTK_FLOAT16] = { + }, +#else + [FYTK_FLOAT16] = { + }, +#endif +#ifdef FY_HAS_FLOAT128 + [FYTK_FLOAT128] = { + }, +#else + [FYTK_FLOAT128] = { + }, +#endif + /* the explicitly sized types are not generated */ + /* they must be explicitly created */ + [FYTK_S8] = { + }, + [FYTK_U8] = { + }, + [FYTK_S16] = { + }, + [FYTK_U16] = { + }, + [FYTK_S32] = { + }, + [FYTK_U32] = { + }, + [FYTK_S64] = { + }, + [FYTK_U64] = { + }, +#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16 + [FYTK_S128] = { + }, + [FYTK_U128] = { + }, +#else + [FYTK_S128] = { + }, + [FYTK_U128] = { + }, +#endif + + /* these are templates */ + [FYTK_RECORD] = { + }, + [FYTK_STRUCT] = { + .object_ops = struct_object_ops, + .emit = struct_emit, + }, + [FYTK_UNION] = { + }, + [FYTK_ENUM] = { + .object_ops = enum_object_ops, + .emit = enum_emit, + }, + [FYTK_TYPEDEF] = { + }, + [FYTK_PTR] = { + }, + [FYTK_CONSTARRAY] = { + .object_ops = const_array_object_ops, + .emit = const_array_emit, + }, + [FYTK_INCOMPLETEARRAY] = { + }, + [FYTK_FUNCTION] = { + }, +}; + +struct reflection_object * +reflection_object_create_internal(struct reflection_object *parent, struct reflection_type_data *rtd, + struct fy_event *fye, struct fy_path *path, + const struct reflection_object_ops *ops, void *data, size_t data_size); + +struct root_instance_data { + struct reflection_decoder *rd; +}; + +static int root_setup(struct reflection_object *ro, struct fy_event *fye, struct fy_path *path) +{ + struct root_instance_data *id; + + fprintf(stderr, "%s\n", __func__); + + assert(ro); + + id = malloc(sizeof(*id)); + if (!id) { + assert(0); + return -1; + } + memset(id, 0, sizeof(*id)); + id->rd = ro->rd; + ro->instance_data = id; + + return 0; +} + +static void root_cleanup(struct reflection_object *ro) +{ + struct root_instance_data *id; + + id = ro->instance_data; + + if (ro->instance_data) { + id = ro->instance_data; + ro->instance_data = NULL; + free(id); + } +} + +struct reflection_object *root_create_child(struct reflection_object *ro_parent, struct fy_event *fye, struct fy_path *path) +{ + struct reflection_type_data *rtd; + + fprintf(stderr, "%s\n", __func__); + + /* pointer */ + switch (ro_parent->rtd->ti->kind) { + case FYTK_PTR: + rtd = reflection_type_data_get_dependent(ro_parent->rtd); + break; + case FYTK_INT: + rtd = ro_parent->rtd; + break; + case FYTK_CONSTARRAY: + rtd = ro_parent->rtd; + break; + case FYTK_STRUCT: + case FYTK_UNION: + rtd = ro_parent->rtd; + break; + + case FYTK_ENUM: + rtd = ro_parent->rtd; + break; + + default: + assert(0); + abort(); + break; + } + + return reflection_object_create_from_type(ro_parent, rtd, + fye, path, ro_parent->data, ro_parent->data_size); +} + +static const struct reflection_object_ops root_ops = { + .setup = root_setup, + .cleanup = root_cleanup, + .create_child = root_create_child, +}; + +void +reflection_object_destroy(struct reflection_object *ro) +{ + if (!ro) + return; + if (ro->ops && ro->ops->cleanup) + ro->ops->cleanup(ro); + free(ro); +} + +int +reflection_object_finish(struct reflection_object *ro) +{ + if (!ro) + return 0; + if (!ro->ops->finish) + return 0; + + return ro->ops->finish(ro); +} + +int +reflection_object_finish_and_destroy(struct reflection_object *ro) +{ + int rc; + + if (!ro) + return 0; + + rc = reflection_object_finish(ro); + reflection_object_destroy(ro); + return rc; +} + +struct reflection_object * +reflection_object_create_internal(struct reflection_object *parent, struct reflection_type_data *rtd, + struct fy_event *fye, struct fy_path *path, + const struct reflection_object_ops *ops, + void *data, size_t data_size) +{ + struct reflection_object *ro = NULL; + int ret; + + if (!fye || !path || !ops) { + assert(0); + return NULL; + } + + ro = malloc(sizeof(*ro)); + if (!ro) { + assert(0); + goto err_out; + } + memset(ro, 0, sizeof(*ro)); + ro->rtd = rtd; + ro->parent = parent; + ro->ops = ops; + ro->data = data; + ro->data_size = data_size; + assert(ro->ops->setup); + ret = ro->ops->setup(ro, fye, path); + if (ret) { + assert(0); + goto err_out; + } + + return ro; +err_out: + reflection_object_destroy(ro); + return NULL; +} + +struct reflection_object * +reflection_object_create_from_type(struct reflection_object *ro_parent, struct reflection_type_data *rtd, + struct fy_event *fye, struct fy_path *path, + void *data, size_t data_size) +{ + struct reflection_object *ro; + const struct reflection_object_ops *ops; + + if (!ro_parent || !fye || !path) { + assert(0); + return NULL; + } + + assert(rtd->ops->object_ops); + ops = rtd->ops->object_ops(rtd); + assert(ops); + + ro = reflection_object_create_internal(ro_parent, rtd, fye, path, ops, data, data_size); + assert(ro); + + return ro; +} + +struct reflection_object * +reflection_object_create_child(struct reflection_object *parent, struct fy_event *fye, struct fy_path *path) +{ + if (!parent || !fye || !path) + return NULL; + + return parent->ops->create_child(parent, fye, path); +} + +int +reflection_object_scalar_child(struct reflection_object *parent, struct fy_event *fye, struct fy_path *path) +{ + struct reflection_object *ro; + + if (!parent || !fye || !path) + return -1; + + /* shortcut exists */ + if (parent->ops->scalar_child) + return parent->ops->scalar_child(parent, fye, path); + + /* create and destroy cycle */ + assert(parent->ops->create_child); + ro = parent->ops->create_child(parent, fye, path); + if (!ro) + return -1; + + return reflection_object_finish_and_destroy(ro); +} + +void reflection_type_data_cleanup(struct reflection_type_data *rtd) +{ + struct reflection_field_data *rfd; + size_t i; + + if (!rtd) + return; + + fy_type_info_set_userdata(rtd->ti, NULL); + for (i = 0, rfd = &rtd->fields[0]; i < rtd->fields_count; i++, rfd++) + fy_field_info_set_userdata(rfd->fi, NULL); + + free(rtd); +} + +int reflection_type_data_setup(const struct fy_type_info *ti) +{ + struct reflection_type_data *rtd = NULL; + struct reflection_field_data *rfd; + const struct fy_field_info *fi; + size_t i; + size_t size; + + size = sizeof(*rtd); + if (fy_type_kind_has_fields(ti->kind)) + size += ti->count * sizeof(rtd->fields[0]); + + rtd = malloc(size); + if (!rtd) + goto err_out; + + memset(rtd, 0, size); + rtd->ti = ti; + rtd->ops = &reflection_ops_table[ti->kind]; + if (fy_type_kind_has_fields(ti->kind)) { + rtd->fields_count = ti->count; + for (i = 0, fi = ti->fields, rfd = &rtd->fields[0]; i < ti->count; i++, fi++, rfd++) { + rfd->rtd = rtd; + rfd->fi = fi; + rfd->index = 0; + fy_field_info_set_userdata(fi, rfd); + } + } + + fy_type_info_set_userdata(ti, rtd); + + return 0; +err_out: + reflection_type_data_cleanup(rtd); + return -1; +} + +void reflection_cleanup_type_system(struct fy_reflection *rfl) +{ + const struct fy_type_info *ti; + void *prev = NULL; + + prev = NULL; + while ((ti = fy_type_info_reverse_iterate(rfl, &prev)) != NULL) + reflection_type_data_cleanup(fy_type_info_get_userdata(ti)); +} + +int reflection_setup_type_system(struct fy_reflection *rfl) +{ + const struct fy_type_info *ti; + void *prev = NULL; + int ret; + + prev = NULL; + while ((ti = fy_type_info_iterate(rfl, &prev)) != NULL) { + ret = reflection_type_data_setup(ti); + if (ret) + goto err_out; + } + + return 0; +err_out: + reflection_cleanup_type_system(rfl); + return -1; +} + +void +reflection_decoder_destroy(struct reflection_decoder *rd) +{ + if (!rd) + return; + + if (rd->data && rd->data_allocated) + free(rd->data); + + free(rd); +} + +struct reflection_decoder * +reflection_decoder_create(bool verbose) +{ + struct reflection_decoder *rd = NULL; + + rd = malloc(sizeof(*rd)); + if (!rd) + goto err_out; + + memset(rd, 0, sizeof(*rd)); + rd->verbose = verbose; + + return rd; + +err_out: + reflection_decoder_destroy(rd); + return NULL; +} + +static enum fy_composer_return +reflection_compose_process_event(struct fy_parser *fyp, struct fy_event *fye, struct fy_path *path, void *userdata) +{ + struct reflection_decoder *rd = userdata; + struct reflection_object *ro, *rop; + enum fy_composer_return ret; + int rc; + + assert(rd); + if (rd->verbose) { + fprintf(stderr, "%s: %c%c%c%c%c %3d - %-32s\n", + fy_event_type_get_text(fye->type), + fy_path_in_root(path) ? 'R' : '-', + fy_path_in_sequence(path) ? 'S' : '-', + fy_path_in_mapping(path) ? 'M' : '-', + fy_path_in_mapping_key(path) ? 'K' : + fy_path_in_mapping_value(path) ? 'V' : '-', + fy_path_in_collection_root(path) ? '/' : '-', + fy_path_depth(path), + fy_path_get_text_alloca(path)); + } + + /* if we're in mapping key wait until we get the whole of the key */ + if (fy_path_in_mapping_key(path)) + return FYCR_OK_CONTINUE; + + switch (fye->type) { + /* nothing to do for those */ + case FYET_NONE: + ret = FYCR_ERROR; + break; + + case FYET_STREAM_START: + case FYET_STREAM_END: + ret = FYCR_OK_CONTINUE; + break; + + case FYET_SCALAR: + rop = fy_path_get_parent_user_data(path); + assert(rop); + + rc = reflection_object_scalar_child(rop, fye, path); + assert(!rc); + ret = FYCR_OK_CONTINUE; + break; + + /* alias not supported yet */ + case FYET_ALIAS: + ret = FYCR_ERROR; + break; + + case FYET_DOCUMENT_START: + ro = reflection_object_create_internal(NULL, rd->entry, fye, path, &root_ops, rd->data, rd->data_size); + assert(ro); + + fy_path_set_root_user_data(path, ro); + ret = FYCR_OK_CONTINUE; + break; + + case FYET_SEQUENCE_START: + case FYET_MAPPING_START: + rop = fy_path_get_parent_user_data(path); + assert(rop); + + ro = reflection_object_create_child(rop, fye, path); + assert(ro); + + fy_path_set_last_user_data(path, ro); + ret = FYCR_OK_CONTINUE; + break; + + case FYET_DOCUMENT_END: + ro = fy_path_get_root_user_data(path); + assert(ro); + fy_path_set_root_user_data(path, NULL); + + rc = reflection_object_finish_and_destroy(ro); + if (rc) { + ret = FYCR_ERROR; + break; + } + + rd->document_ready = true; + /* on single document mode we stop here */ + if (rd->single_document) + ret = FYCR_OK_STOP; + else + ret = FYCR_OK_CONTINUE; + break; + + case FYET_SEQUENCE_END: + case FYET_MAPPING_END: + ro = fy_path_get_last_user_data(path); + assert(ro); + fy_path_set_last_user_data(path, NULL); + + rc = reflection_object_finish_and_destroy(ro); + if (rc) { + ret = FYCR_ERROR; + break; + } + + ret = FYCR_OK_CONTINUE; + break; + + default: + assert(0); + abort(); + } + + assert(ret == FYCR_OK_CONTINUE || ret == FYCR_OK_STOP); + return ret; +} + +int +reflection_decoder_parse(struct reflection_decoder *rd, struct fy_parser *fyp, const struct fy_type_info *ti, void *data, size_t data_size) +{ + ; + struct reflection_type_data *rtd, *rtd_dep; + size_t dep_size, type_size; + int rc; + + if (!rd || !fyp || !ti) + return -1; + + rtd = fy_type_info_get_userdata(ti); + + /* verify it's a pointer (always) */ + if (rtd->ti->kind == FYTK_PTR) { + /* get the dependent type (if rtd = "int *" rtd_dep = "int") */ + rtd_dep = reflection_type_data_get_dependent(rtd); + if (!rtd_dep) + return -1; + + dep_size = rtd_dep->ti->size; + if (rtd->ti->kind == FYTK_CONSTARRAY) + dep_size *= rtd_dep->ti->count; + + type_size = dep_size; + } else { + type_size = rtd->ti->size; + } + + if (data) { + /* verify size and alignment */ + if (type_size < data_size) + return -1; + rd->data = data; + rd->data_size = data_size; + rd->data_allocated = false; + } else { + rd->data_size = type_size; + rd->data = malloc(rd->data_size); + if (!rd->data) + return -1; + rd->data_allocated = true; + } + + /* we're good to go */ + memset(rd->data, 0, rd->data_size); + rd->entry = rtd; + + /* ignore errors for now */ + rc = fy_parse_compose(fyp, reflection_compose_process_event, rd); + if (rc) + return rc; + + if (fy_parser_get_stream_error(fyp)) + return -1; + + return 0; +} + +struct reflection_encoder { + bool emitted_stream_start; + bool emitted_stream_end; + bool verbose; + /* bindable */ + struct fy_emitter *fye; + struct reflection_type_data *entry; + const void *data; + size_t data_size; +}; + +void +reflection_encoder_destroy(struct reflection_encoder *re) +{ + if (!re) + return; + + free(re); +} + +struct reflection_encoder * +reflection_encoder_create(bool verbose) +{ + struct reflection_encoder *re = NULL; + + re = malloc(sizeof(*re)); + if (!re) + return NULL; + + memset(re, 0, sizeof(*re)); + re->verbose = verbose; + + return re; +} + +int +reflection_encoder_emit(struct reflection_encoder *re, struct fy_emitter *fye, struct reflection_type_data *rtd, const void *data, size_t data_size) +{ + int rc; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_STREAM_START)); + if (rc) + goto err_out; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_DOCUMENT_START, 0, NULL, NULL)); + if (rc) + goto err_out; + + assert(rtd->ops->emit); + rc = rtd->ops->emit(rtd, fye, data, data_size); + if (rc) + goto err_out; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_DOCUMENT_END, 0)); + if (rc) + goto err_out; + + rc = fy_emit_event(fye, fy_emit_event_create(fye, FYET_STREAM_END)); + if (rc) + goto err_out; + + return 0; +err_out: + return -1; +} + +int main(int argc, char *argv[]) +{ + struct fy_parse_cfg cfg = { + .search_path = INCLUDE_DEFAULT, + .flags = + (QUIET_DEFAULT ? FYPCF_QUIET : 0) | + (RESOLVE_DEFAULT ? FYPCF_RESOLVE_DOCUMENT : 0) | + (DISABLE_ACCEL_DEFAULT ? FYPCF_DISABLE_ACCELERATORS : 0) | + (DISABLE_BUFFERING_DEFAULT ? FYPCF_DISABLE_BUFFERING : 0) | + (DISABLE_DEPTH_LIMIT_DEFAULT ? FYPCF_DISABLE_DEPTH_LIMIT : 0) | + (SLOPPY_FLOW_INDENTATION_DEFAULT ? FYPCF_SLOPPY_FLOW_INDENTATION : 0) | + (PREFER_RECURSIVE_DEFAULT ? FYPCF_PREFER_RECURSIVE : 0) | + (YPATH_ALIASES_DEFAULT ? FYPCF_YPATH_ALIASES : 0), + }; + struct fy_emitter_cfg emit_cfg; + struct fy_parser *fyp = NULL; + struct fy_emitter *fye = NULL; + int rc, exitcode = EXIT_FAILURE, opt, lidx, i, j, step = 1; + enum fy_error_module errmod; + unsigned int errmod_mask; + bool show; + int indent = INDENT_DEFAULT; + int width = WIDTH_DEFAULT; + bool follow = FOLLOW_DEFAULT; + const char *to = TO_DEFAULT; + const char *from = FROM_DEFAULT; + const char *color = COLOR_DEFAULT; + const char *file = NULL, *trim = TRIM_DEFAULT; + char *tmp, *s, *progname; + struct fy_document *fyd, *fyd_join = NULL; + struct dump_userdata du; + enum fy_emitter_cfg_flags emit_flags = 0; + struct fy_node *fyn, *fyn_emit, *fyn_to, *fyn_from; + int count_ins = 0; + struct fy_document **fyd_ins = NULL; + int tool_mode = OPT_TOOL; + struct fy_event *fyev; + struct fy_token *fyt; + bool join_resolve = RESOLVE_DEFAULT; + struct fy_token_iter *iter; + bool streaming = STREAMING_DEFAULT; + struct fy_diag_cfg dcfg; + struct fy_diag *diag = NULL; + struct fy_path_parse_cfg pcfg; + struct fy_path_expr *expr = NULL; + struct fy_path_exec_cfg xcfg; + struct fy_path_exec *fypx = NULL; + struct fy_node *fyn_start; + bool dump_pathexpr = false; + bool noexec = false; + bool null_output = false; + bool stdin_input; + void *res_iter; + bool disable_flow_markers = DISABLE_FLOW_MARKERS_DEFAULT; + bool document_event_stream = DOCUMENT_EVENT_STREAM_DEFAULT; + bool collect_errors = COLLECT_ERRORS_DEFAULT; + bool allow_duplicate_keys = ALLOW_DUPLICATE_KEYS_DEFAULT; + bool tsv_format = TSV_FORMAT_DEFAULT; + struct composer_data cd; + bool dump_path = DUMP_PATH_DEFAULT; + const char *input_arg; + struct fy_reflection *rfl = NULL; + const char *cflags = ""; + const char *import_blob = NULL; + const char *generate_blob = NULL; + bool type_dump = false, prune_system = false; + const char *type_include = NULL, *type_exclude = NULL; + const char *import_c_file = NULL; + const char *entry_type = NULL; + struct reflection_decoder *rd = NULL; + struct reflection_encoder *re = NULL; + const struct fy_type_info *ti = NULL; + + fy_valgrind_check(&argc, &argv); + + /* select the appropriate tool mode */ + progname = argv[0]; + progname = strrchr(argv[0], '/'); + if (!progname) + progname = argv[0]; + else + progname++; + + /* default mode is dump */ + if (!strcmp(progname, "fy-filter")) + tool_mode = OPT_FILTER; + else if (!strcmp(progname, "fy-testsuite")) + tool_mode = OPT_TESTSUITE; + else if (!strcmp(progname, "fy-dump")) + tool_mode = OPT_DUMP; + else if (!strcmp(progname, "fy-join")) + tool_mode = OPT_JOIN; + else if (!strcmp(progname, "fy-ypath")) + tool_mode = OPT_YPATH; + else if (!strcmp(progname, "fy-scan-dump")) + tool_mode = OPT_SCAN_DUMP; + else if (!strcmp(progname, "fy-parse-dump")) + tool_mode = OPT_PARSE_DUMP; + else if (!strcmp(progname, "fy-compose")) + tool_mode = OPT_COMPOSE; + else if (!strcmp(progname, "fy-yaml-version-dump")) + tool_mode = OPT_YAML_VERSION_DUMP; + else if (!strcmp(progname, "fy-reflect")) + tool_mode = OPT_REFLECT; + else + tool_mode = OPT_TOOL; + + fy_diag_cfg_default(&dcfg); + /* XXX remember to modify this if you change COLOR_DEFAULT */ + + memset(&du, 0, sizeof(du)); + du.fp = stdout; + du.colorize = isatty(fileno(stdout)) == 1; + du.visible = VISIBLE_DEFAULT; + + emit_flags = (SORT_DEFAULT ? FYECF_SORT_KEYS : 0) | + (COMMENT_DEFAULT ? FYECF_OUTPUT_COMMENTS : 0) | + (STRIP_LABELS_DEFAULT ? FYECF_STRIP_LABELS : 0) | + (STRIP_TAGS_DEFAULT ? FYECF_STRIP_TAGS : 0) | + (STRIP_DOC_DEFAULT ? FYECF_STRIP_DOC : 0); + apply_mode_flags(MODE_DEFAULT, &emit_flags); + + while ((opt = getopt_long_only(argc, argv, + "I:" "d:" "i:" "w:" "rsc" "C:" "m:" "V" "f:" "t:" "T:F:" "j:" "qhvl", + lopts, &lidx)) != -1) { + switch (opt) { + case 'I': + tmp = alloca(strlen(cfg.search_path) + 1 + strlen(optarg) + 1); + s = tmp; + strcpy(s, cfg.search_path); + if (cfg.search_path && cfg.search_path[0]) { + s += strlen(cfg.search_path); + *s++ = ':'; + } + strcpy(s, optarg); + s += strlen(optarg); + *s = '\0'; + cfg.search_path = tmp; + break; + case 'i': + indent = atoi(optarg); + if (indent < 0 || indent > FYECF_INDENT_MASK) { + fprintf(stderr, "bad indent option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + + break; + case 'w': + width = atoi(optarg); + if (width < 0 || width > FYECF_WIDTH_MASK) { + fprintf(stderr, "bad width option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + break; + case 'd': + dcfg.level = fy_string_to_error_type(optarg); + if (dcfg.level == FYET_MAX) { + fprintf(stderr, "bad debug level option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + break; + case OPT_DISABLE_DIAG: + case OPT_ENABLE_DIAG: + if (!strcmp(optarg, "all")) { + errmod_mask = FY_BIT(FYEM_MAX) - 1; + } else { + errmod = fy_string_to_error_module(optarg); + if (errmod == FYEM_MAX) { + fprintf(stderr, "bad error module option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + errmod_mask = FY_BIT(errmod); + } + if (opt == OPT_DISABLE_DIAG) + dcfg.module_mask &= ~errmod_mask; + else + dcfg.module_mask |= errmod_mask; + break; + + case OPT_SHOW_DIAG: + case OPT_HIDE_DIAG: + show = opt == OPT_SHOW_DIAG; + if (!strcmp(optarg, "source")) { + dcfg.show_source = show; + } else if (!strcmp(optarg, "position")) { + dcfg.show_position = show; + } else if (!strcmp(optarg, "type")) { + dcfg.show_type = show; + } else if (!strcmp(optarg, "module")) { + dcfg.show_module = show; + } else { + fprintf(stderr, "bad %s option %s\n", + show ? "show" : "hide", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + break; + + case 'r': + cfg.flags |= FYPCF_RESOLVE_DOCUMENT; + break; + case 's': + emit_flags |= FYECF_SORT_KEYS; + break; + case 'c': + cfg.flags |= FYPCF_PARSE_COMMENTS; + emit_flags |= FYECF_OUTPUT_COMMENTS; + break; + case 'C': + color = optarg; + if (!strcmp(color, "auto")) { + dcfg.colorize = isatty(fileno(stderr)) == 1; + du.colorize = isatty(fileno(stdout)) == 1; + } + else if (!strcmp(color, "yes") || !strcmp(color, "1") || !strcmp(color, "on")) { + dcfg.colorize = true; + du.colorize = true; + } else if (!strcmp(color, "no") || !strcmp(color, "0") || !strcmp(color, "off")) { + dcfg.colorize = false; + du.colorize = false; + } else { + fprintf(stderr, "bad color option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + break; + case 'm': + rc = apply_mode_flags(optarg, &emit_flags); + if (rc) { + fprintf(stderr, "bad mode option %s\n", optarg); + display_usage(stderr, progname, tool_mode); + return EXIT_FAILURE; + } + break; + case 'V': + du.visible = true; + break; + case 'l': + follow = true; + break; + case 'q': + cfg.flags |= FYPCF_QUIET; + dcfg.output_fn = no_diag_output_fn; + dcfg.fp = NULL; + dcfg.colorize = false; + break; + case 'f': + file = optarg; break; case 't': trim = optarg; @@ -1797,6 +3426,7 @@ int main(int argc, char *argv[]) case OPT_PARSE_DUMP: case OPT_COMPOSE: case OPT_YAML_VERSION_DUMP: + case OPT_REFLECT: tool_mode = opt; break; case OPT_STRIP_LABELS: @@ -1888,6 +3518,33 @@ int main(int argc, char *argv[]) case OPT_TSV_FORMAT: tsv_format = true; break; + case OPT_GENERATE_BLOB: + generate_blob = optarg; + break; + case OPT_IMPORT_BLOB: + import_blob = optarg; + break; + case OPT_TYPE_DUMP: + type_dump = true; + break; + case OPT_PRUNE_SYSTEM: + prune_system = true; + break; + case OPT_CFLAGS: + cflags = optarg; + break; + case OPT_TYPE_INCLUDE: + type_include = optarg; + break; + case OPT_TYPE_EXCLUDE: + type_exclude = optarg; + break; + case OPT_IMPORT_C_FILE: + import_c_file = optarg; + break; + case OPT_ENTRY_TYPE: + entry_type = optarg; + break; case 'h' : default: if (opt != 'h') @@ -2442,11 +4099,123 @@ int main(int argc, char *argv[]) break; + case OPT_REFLECT: + rfl = NULL; + + if (import_blob) { + rfl = fy_reflection_from_packed_blob_file(import_blob); + if (!rfl) { + fprintf(stderr, "unable to get reflection from blob file %s\n", import_blob); + goto cleanup; + } + + } else if (import_c_file) { + file = import_c_file; + rfl = fy_reflection_from_c_file_with_cflags(file, cflags, true, true); + if (!rfl) { + fprintf(stderr, "unable to perform reflection from file %s\n", file); + goto cleanup; + } + } else + rfl = NULL; + + if (!rfl) { + fprintf(stderr, "No reflection; provide either --import-blob or --import-c-file option\n"); + goto cleanup; + } + + if (prune_system) + reflection_prune_system(rfl); + + if (type_include || type_exclude) { + rc = reflection_type_filter(rfl, type_include, type_exclude); + if (rc) + goto cleanup; + } + + if (type_dump) + reflection_type_info_c_dump(rfl); + else { + rc = reflection_setup_type_system(rfl); + if (rc) { + fprintf(stderr, "reflection_setup_type_system() failed!\n"); + goto cleanup; + } + + if (!entry_type) { + fprintf(stderr, "No entry point type; supply an --entry-type\n"); + goto cleanup; + } + + if (optind >= argc) { + fprintf(stderr, "missing yaml file to dump\n"); + goto cleanup; + } + + rd = reflection_decoder_create(dump_path); + if (!rd) { + fprintf(stderr, "failed to create the decoder\n"); + goto cleanup; + } + + re = reflection_encoder_create(dump_path); + if (!re) { + fprintf(stderr, "failed to create the encoder\n"); + goto cleanup; + } + + for (i = optind; i < argc; i++) { + rc = set_parser_input(fyp, argv[i], false); + if (rc) { + fprintf(stderr, "failed to set parser input to '%s' for dump\n", argv[i]); + goto cleanup; + } + } + + ti = reflection_lookup_type_by_name(rfl, entry_type); + if (!ti) { + fprintf(stderr, "Unable to lookup type info for entry_type '%s'\n", entry_type); + goto cleanup; + } + + rc = reflection_decoder_parse(rd, fyp, ti, NULL, 0); + if (rc) { + fprintf(stderr, "unable to parse with the decoder\n"); + goto cleanup; + } + + rc = reflection_encoder_emit(re, fye, rd->entry, rd->data, rd->data_size); + if (rc) { + fprintf(stderr, "unable to emit with the encoder\n"); + goto cleanup; + } + } + + if (generate_blob) { + rc = fy_reflection_to_packed_blob_file(rfl, generate_blob); + if (rc) { + fprintf(stderr, "unable to generate blob to file %s\n", generate_blob); + goto cleanup; + } + } + /* cleanup will take care of rfl cleanup */ + break; } exitcode = EXIT_SUCCESS; cleanup: + if (re) + reflection_encoder_destroy(re); + + if (rd) + reflection_decoder_destroy(rd); + + if (rfl) { + reflection_cleanup_type_system(rfl); + fy_reflection_destroy(rfl); + } + if (fypx) fy_path_exec_destroy(fypx); diff --git a/src/util/fy-allocator-auto.c b/src/util/fy-allocator-auto.c new file mode 100644 index 00000000..803e1e1e --- /dev/null +++ b/src/util/fy-allocator-auto.c @@ -0,0 +1,337 @@ +/* + * fy-allocator-auto.c - automatic allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include "fy-utils.h" +#include "fy-allocator-mremap.h" +#include "fy-allocator-dedup.h" +#include "fy-allocator-linear.h" +#include "fy-allocator-auto.h" + +static const struct fy_auto_setup_data default_setup_data = { + .scenario = FYAST_BALANCED, + .estimated_max_size = 1048576, /* 1MB */ +}; + +static void fy_auto_cleanup(struct fy_allocator *a); + +static int fy_auto_setup(struct fy_allocator *a, const void *data) +{ + size_t pagesz; + struct fy_auto_allocator *aa; + const struct fy_auto_setup_data *d; + struct fy_mremap_setup_data mrsetupdata; + struct fy_dedup_setup_data dsetupdata; + struct fy_allocator *mra = NULL, *da = NULL; + + if (!a) + return -1; + + pagesz = sysconf(_SC_PAGESIZE); + d = data ? data : &default_setup_data; + + aa = container_of(a, struct fy_auto_allocator, a); + memset(aa, 0, sizeof(*aa)); + aa->a.name = "auto"; + aa->a.ops = &fy_auto_allocator_ops; + + memset(&mrsetupdata, 0, sizeof(mrsetupdata)); + mrsetupdata.big_alloc_threshold = SIZE_MAX; + mrsetupdata.empty_threshold = 64; + mrsetupdata.grow_ratio = 1.5; + mrsetupdata.balloon_ratio = 8.0; + mrsetupdata.arena_type = FYMRAT_MMAP; + + if (d->estimated_max_size && d->estimated_max_size != SIZE_MAX) + mrsetupdata.minimum_arena_size = fy_size_t_align(d->estimated_max_size, pagesz); + else + mrsetupdata.minimum_arena_size = fy_size_t_align(16 << 20, pagesz); /* 16 MB */ + + fprintf(stderr, "mrsetupdata.minimum_arena_size=%zu\n", mrsetupdata.minimum_arena_size); + + mra = fy_allocator_create("mremap", &mrsetupdata); + if (!mra) + goto err_out; + + /* TODO switch to malloc for valgrind and asan check mode */ + if (d->scenario == FYAST_FASTEST) { + aa->parent_allocator = mra; + aa->sub_parent_allocator = NULL; + mra = NULL; + } else { + memset(&dsetupdata, 0, sizeof(dsetupdata)); + dsetupdata.parent_allocator = mra; + dsetupdata.bloom_filter_bits = 0; /* use default */ + dsetupdata.bucket_count_bits = 0; + dsetupdata.estimated_content_size = mrsetupdata.minimum_arena_size; + + da = fy_allocator_create("dedup", &dsetupdata); + if (!da) + goto err_out; + + aa->parent_allocator = da; + aa->sub_parent_allocator = mra; + + da = NULL; + mra = NULL; + } + + return 0; +err_out: + if (da) + fy_allocator_destroy(da); + if (mra) + fy_allocator_destroy(mra); + fy_auto_cleanup(a); + return -1; +} + +static void fy_auto_cleanup(struct fy_allocator *a) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + if (aa->sub_parent_allocator) { + fy_allocator_destroy(aa->sub_parent_allocator); + aa->sub_parent_allocator = NULL; + } + + if (aa->parent_allocator) { + fy_allocator_destroy(aa->parent_allocator); + aa->parent_allocator = NULL; + } +} + +struct fy_allocator *fy_auto_create(const void *setupdata) +{ + struct fy_auto_allocator *aa = NULL; + int rc; + + aa = malloc(sizeof(*aa)); + if (!aa) + goto err_out; + + rc = fy_auto_setup(&aa->a, setupdata); + if (rc) + goto err_out; + + return &aa->a; + +err_out: + if (aa) + free(aa); + + return NULL; +} + +void fy_auto_destroy(struct fy_allocator *a) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_auto_cleanup(a); + + free(aa); +} + +void fy_auto_dump(struct fy_allocator *a) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_dump(aa->parent_allocator); +} + +static void *fy_auto_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + struct fy_auto_allocator *aa; + + if (!a) + return NULL; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_alloc(aa->parent_allocator, tag, size, align); +} + +static void fy_auto_free(struct fy_allocator *a, fy_alloc_tag tag, void *data) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_free(aa->parent_allocator, tag, data); +} + +static int fy_auto_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + struct fy_auto_allocator *aa; + + if (!a) + return -1; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_update_stats(aa->parent_allocator, tag, stats); +} + +static const void *fy_auto_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + struct fy_auto_allocator *aa; + + if (!a) + return NULL; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_store(aa->parent_allocator, tag, data, size, align); +} + +static const void *fy_auto_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + struct fy_auto_allocator *aa; + + if (!a) + return NULL; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_storev(aa->parent_allocator, tag, iov, iovcnt, align); +} + +static void fy_auto_release(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_release(aa->parent_allocator, tag, data, size); +} + +static fy_alloc_tag fy_auto_get_tag(struct fy_allocator *a, const void *tag_config) +{ + struct fy_auto_allocator *aa; + + if (!a) + return FY_ALLOC_TAG_ERROR; + + /* TODO, convert tag config? */ + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_get_tag(aa->parent_allocator, NULL); +} + +static void fy_auto_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_release_tag(aa->parent_allocator, tag); +} + +static void fy_auto_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_trim_tag(aa->parent_allocator, tag); +} + +static void fy_auto_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_auto_allocator *aa; + + if (!a) + return; + + aa = container_of(a, struct fy_auto_allocator, a); + + fy_allocator_reset_tag(aa->parent_allocator, tag); +} + +static ssize_t fy_auto_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + struct fy_auto_allocator *aa; + + if (!a) + return -1; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_get_areas(aa->parent_allocator, tag, iov, maxiov); +} + +static const void *fy_auto_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + struct fy_auto_allocator *aa; + + if (!a) + return NULL; + + aa = container_of(a, struct fy_auto_allocator, a); + + return fy_allocator_get_single_area(aa->parent_allocator, tag, sizep, startp, allocp); +} + +const struct fy_allocator_ops fy_auto_allocator_ops = { + .setup = fy_auto_setup, + .cleanup = fy_auto_cleanup, + .create = fy_auto_create, + .destroy = fy_auto_destroy, + .dump = fy_auto_dump, + .alloc = fy_auto_alloc, + .free = fy_auto_free, + .update_stats = fy_auto_update_stats, + .store = fy_auto_store, + .storev = fy_auto_storev, + .release = fy_auto_release, + .get_tag = fy_auto_get_tag, + .release_tag = fy_auto_release_tag, + .trim_tag = fy_auto_trim_tag, + .reset_tag = fy_auto_reset_tag, + .get_areas = fy_auto_get_areas, + .get_single_area = fy_auto_get_single_area, +}; diff --git a/src/util/fy-allocator-auto.h b/src/util/fy-allocator-auto.h new file mode 100644 index 00000000..951ec647 --- /dev/null +++ b/src/util/fy-allocator-auto.h @@ -0,0 +1,37 @@ +/* + * fy-allocator-auto.h - the auto allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_AUTO_H +#define FY_ALLOCATOR_AUTO_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "fy-allocator.h" + +enum fy_auto_scenario_type { + FYAST_FASTEST, /* fast, don't care about memory */ + FYAST_CONSERVE_MEMORY, /* conserve memory */ + FYAST_BALANCED, /* balance between allocs and frees */ +}; + +struct fy_auto_setup_data { + enum fy_auto_scenario_type scenario; + size_t estimated_max_size; +}; + +struct fy_auto_allocator { + struct fy_allocator a; + struct fy_auto_setup_data d; + struct fy_allocator *parent_allocator; + struct fy_allocator *sub_parent_allocator; +}; + +extern const struct fy_allocator_ops fy_auto_allocator_ops; + +#endif diff --git a/src/util/fy-allocator-dedup.c b/src/util/fy-allocator-dedup.c new file mode 100644 index 00000000..08cf82dd --- /dev/null +++ b/src/util/fy-allocator-dedup.c @@ -0,0 +1,1033 @@ +/* + * fy-allocator-dedup.c - dedup allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* for container_of */ +#include "fy-list.h" +#include "fy-utils.h" + +#include "fy-allocator-dedup.h" + +#define DEBUG_GROWS + +#undef BEFORE +#define BEFORE() \ + do { \ + clock_gettime(CLOCK_MONOTONIC, &before); \ + } while(0) + +#undef AFTER +#define AFTER() \ + ({ \ + clock_gettime(CLOCK_MONOTONIC, &after); \ + (int64_t)(after.tv_sec - before.tv_sec) * (int64_t)1000000000UL + (int64_t)(after.tv_nsec - before.tv_nsec); \ + }) + +static const unsigned int bit_to_chain_length_map[] = { + [0] = 1, /* 1 */ + [1] = 1, /* 2 */ + [2] = 1, /* 4 */ + [3] = 1, /* 8 */ + [4] = 1, /* 16 */ + [5] = 1, /* 32 */ + [6] = 2, /* 64 */ + [7] = 2, /* 128 */ + [8] = 2, /* 256 */ + [9] = 2, /* 512 */ + [10] = 3, /* 1024 */ + [11] = 3, /* 2048 */ + [12] = 3, /* 2048 */ + [13] = 3, /* 2048 */ + [14] = 4, /* 4096 */ + [15] = 4, /* 8192 */ + [16] = 5, /* 16384 */ + [17] = 5, /* 32768 */ + [18] = 6, /* 65536 */ + [19] = 7, /* 65536 */ + [20] = 8, /* 131072 */ + [21] = 9, /* 262144*/ + [22] = 10, /* 524288 */ + [23] = INT_MAX /* infinite from now on */ +}; + +static inline struct fy_dedup_tag * +fy_dedup_tag_from_tag(struct fy_dedup_allocator *da, fy_alloc_tag tag) +{ + if (!da) + return NULL; + + if ((unsigned int)tag >= ARRAY_SIZE(da->tags)) + return NULL; + + if (!fy_id_is_used(da->ids, ARRAY_SIZE(da->ids), (int)tag)) + return NULL; + + return &da->tags[tag]; +} + +static void fy_dedup_tag_data_cleanup(struct fy_dedup_allocator *da, struct fy_dedup_tag_data *dtd) +{ + if (dtd->buckets) + free(dtd->buckets); + if (dtd->bloom_id) + free(dtd->bloom_id); + if (dtd->buckets_in_use) + free(dtd->buckets_in_use); + memset(dtd, 0, sizeof(*dtd)); +} + +static int fy_dedup_tag_data_setup(struct fy_dedup_allocator *da, struct fy_dedup_tag_data *dtd, + unsigned int bloom_filter_bits, unsigned int bucket_count_bits, + size_t dedup_threshold, unsigned int chain_length_grow_trigger) +{ + size_t buckets_size; + + assert(da); + assert(dtd); + assert(dtd); + + memset(dtd, 0, sizeof(*dtd)); + + dtd->bloom_filter_bits = bloom_filter_bits; + dtd->bucket_count_bits = bucket_count_bits; + dtd->dedup_threshold = dedup_threshold; + dtd->chain_length_grow_trigger = chain_length_grow_trigger; + if (!dtd->chain_length_grow_trigger) { + if (bucket_count_bits >= ARRAY_SIZE(bit_to_chain_length_map)) + dtd->chain_length_grow_trigger = bit_to_chain_length_map[ARRAY_SIZE(bit_to_chain_length_map) - 1]; + else + dtd->chain_length_grow_trigger = bit_to_chain_length_map[bucket_count_bits]; + + if (dtd->chain_length_grow_trigger == 0) + dtd->chain_length_grow_trigger++; + } + + dtd->bloom_filter_mask = (1U << dtd->bloom_filter_bits) - 1; + dtd->bucket_count_mask = (1U << dtd->bucket_count_bits) - 1; + + dtd->bloom_id_count = (1U << dtd->bloom_filter_bits) / FY_ID_BITS_BITS; + dtd->bloom_id = malloc(2 * dtd->bloom_id_count * sizeof(*dtd->bloom_id)); + if (!dtd->bloom_id) + goto err_out; + dtd->bloom_update_id = dtd->bloom_id + dtd->bloom_id_count; + fy_id_reset(dtd->bloom_id, dtd->bloom_id_count); + fy_id_reset(dtd->bloom_update_id, dtd->bloom_id_count); + + dtd->bucket_count = 1U << dtd->bucket_count_bits; + + buckets_size = sizeof(*dtd->buckets) * dtd->bucket_count; + dtd->buckets = malloc(buckets_size); + if (!dtd->buckets) + goto err_out; + dtd->buckets_end = dtd->buckets + dtd->bucket_count; + + dtd->bucket_id_count = (1U << dtd->bucket_count_bits) / FY_ID_BITS_BITS; + dtd->buckets_in_use = malloc(2 * dtd->bucket_id_count * sizeof(*dtd->buckets_in_use)); + if (!dtd->buckets_in_use) + goto err_out; + dtd->buckets_collision = dtd->buckets_in_use + dtd->bucket_id_count; + fy_id_reset(dtd->buckets_in_use, dtd->bucket_id_count); + fy_id_reset(dtd->buckets_collision, dtd->bucket_id_count); + + return 0; + +err_out: + fy_dedup_tag_data_cleanup(da, dtd); + return -1; +} + +static void fy_dedup_tag_cleanup(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt) +{ + struct fy_dedup_tag_data *dtd; + int id; + + if (!da || !dt) + return; + + /* get the id from the pointer */ + id = dt - da->tags; + assert((unsigned int)id < ARRAY_SIZE(da->tags)); + + /* already clean? */ + if (fy_id_is_free(da->ids, ARRAY_SIZE(da->ids), id)) + return; + + fy_id_free(da->ids, ARRAY_SIZE(da->ids), id); + + dtd = &dt->data[dt->data_active]; + +#ifdef DEBUG_GROWS + fprintf(stderr, "%s: dump of state at close\n", __func__); + fprintf(stderr, "%s: bloom count=%u used=%lu\n", + __func__, 1 << dtd->bloom_filter_bits, + fy_id_count_used(dtd->bloom_id, dtd->bloom_id_count)); + fprintf(stderr, "%s: bucket count=%u used=%lu collision=%lu\n", + __func__, 1 << dtd->bucket_count_bits, + fy_id_count_used(dtd->buckets_in_use, dtd->bucket_id_count), + fy_id_count_used(dtd->buckets_collision, dtd->bucket_id_count)); +#endif + + fy_dedup_tag_data_cleanup(da, dtd); + + /* we just release the tags, the underlying allocator should free everything */ + if (dt->entries_tag != FY_ALLOC_TAG_NONE) + fy_allocator_release_tag(da->entries_allocator, dt->entries_tag); + if (dt->content_tag != FY_ALLOC_TAG_NONE) + fy_allocator_release_tag(da->parent_allocator, dt->content_tag); + + memset(dt, 0, sizeof(*dt)); + dt->entries_tag = FY_ALLOC_TAG_NONE; + dt->content_tag = FY_ALLOC_TAG_NONE; +} + +static int fy_dedup_tag_setup(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt) +{ + struct fy_dedup_tag_data *dtd; + int rc; + + assert(da); + assert(dt); + + memset(dt, 0, sizeof(*dt)); + + dt->entries_tag = FY_ALLOC_TAG_NONE; + dt->content_tag = FY_ALLOC_TAG_NONE; + + dt->entries_tag = fy_allocator_get_tag(da->entries_allocator, NULL); + if (dt->entries_tag == FY_ALLOC_TAG_ERROR) + goto err_out; + + dt->content_tag = fy_allocator_get_tag(da->parent_allocator, NULL); + if (dt->content_tag == FY_ALLOC_TAG_ERROR) + goto err_out; + + dt->data_active = 0; + dtd = &dt->data[dt->data_active]; + + rc = fy_dedup_tag_data_setup(da, dtd, da->bloom_filter_bits, da->bucket_count_bits, da->dedup_threshold, da->chain_length_grow_trigger); + if (rc) + goto err_out; + + return 0; + +err_out: + fy_dedup_tag_cleanup(da, dt); + return -1; +} + +static int fy_dedup_tag_adjust(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt, int bloom_filter_adjust_bits, int bucket_adjust_bits) +{ + struct fy_dedup_tag_data *dtd, *new_dtd; + unsigned int bit_shift, new_bucket_count_bits, new_bloom_filter_bits; + unsigned int bloom_pos, bucket_pos; + struct fy_dedup_entry *de; + struct fy_dedup_entry_list *del, *new_del; + struct fy_id_iter iter; + int rc, id; + +#ifdef DEBUG_GROWS + int64_t ns; + struct timespec before, after; + BEFORE(); +#endif + + bit_shift = (unsigned int)fy_id_ffs(FY_ID_BITS_BITS); + + assert(da); + assert(dt); + dtd = &dt->data[dt->data_active]; + new_dtd = &dt->data[!dt->data_active]; + + new_bucket_count_bits = (unsigned int)((int)dtd->bucket_count_bits + bucket_adjust_bits); + if (new_bucket_count_bits > (sizeof(int) * 8 - 1)) + new_bucket_count_bits = (sizeof(int) * 8) - 1; + else if (new_bucket_count_bits < bit_shift) + new_bucket_count_bits = bit_shift; + + new_bloom_filter_bits = (unsigned int)((int)dtd->bloom_filter_bits + bloom_filter_adjust_bits); + if (new_bloom_filter_bits > (sizeof(int) * 8 - 1)) + new_bloom_filter_bits = (sizeof(int) * 8) - 1; + else if (new_bloom_filter_bits < new_bucket_count_bits) + new_bloom_filter_bits = new_bucket_count_bits; + + /* setup the new data */ + rc = fy_dedup_tag_data_setup(da, new_dtd, new_bloom_filter_bits, new_bucket_count_bits, da->dedup_threshold, da->chain_length_grow_trigger); + if (rc) + goto err_out; + + fy_id_iter_begin(dtd->buckets_in_use, dtd->bucket_id_count, &iter); + while ((id = fy_id_iter_next(dtd->buckets_in_use, dtd->bucket_id_count, &iter)) >= 0) { + + assert(fy_id_is_used(dtd->buckets_in_use, dtd->bucket_id_count, id)); + + del = dtd->buckets + id; + + while ((de = fy_dedup_entry_list_pop(del)) != NULL) { + + bloom_pos = (unsigned int)de->hash & new_dtd->bloom_filter_mask; + assert((int)bloom_pos >= 0); + fy_id_set_used(new_dtd->bloom_id, new_dtd->bloom_id_count, bloom_pos); + + bucket_pos = (unsigned int)de->hash & new_dtd->bucket_count_mask; + assert((int)bucket_pos >= 0); + + new_del = new_dtd->buckets + bucket_pos; + if (!fy_id_is_used(new_dtd->buckets_in_use, new_dtd->bucket_id_count, bucket_pos)) { + assert(FY_ID_OFFSET(bucket_pos) < new_dtd->bucket_id_count); + fy_id_set_used(new_dtd->buckets_in_use, new_dtd->bucket_id_count, bucket_pos); + fy_dedup_entry_list_init(new_del); + } else { + assert(FY_ID_OFFSET(bucket_pos) < new_dtd->bucket_id_count); + fy_id_set_used(new_dtd->buckets_collision, new_dtd->bucket_id_count, bucket_pos); + } + fy_dedup_entry_list_add(new_del, de); + } + } + fy_id_iter_end(dtd->buckets_in_use, dtd->bucket_id_count, &iter); + +#ifdef DEBUG_GROWS + + ns = AFTER(); + fprintf(stderr, "%s: operation took place in %"PRId64"ns\n", __func__, ns); + + { + const char *ban[2] = { "old", "new" }; + struct fy_dedup_tag_data *arr[2] = { dtd, new_dtd }; + struct fy_dedup_tag_data *d; + size_t bloom_count, bloom_used; + size_t bucket_count, bucket_used, bucket_collision; + unsigned int i; + + for (i = 0; i < 2; i++) { + d = arr[i]; + bloom_count = 1U << d->bloom_filter_bits; + bloom_used = fy_id_count_used(d->bloom_id, d->bloom_id_count); + bucket_count = 1U << d->bucket_count_bits; + bucket_used = fy_id_count_used(d->buckets_in_use, d->bucket_id_count); + bucket_collision = fy_id_count_used(d->buckets_collision, d->bucket_id_count); + + + fprintf(stderr, "%s: bloom %zu used %zu (%2.2f%%) ", ban[i], bloom_count, + bloom_used, 100.0*(double)bloom_used/(double)bloom_count); + fprintf(stderr, "bucket %zu used %zu (%2.2f%%) coll %zu (%2.2f%%)\n", bucket_count, + bucket_used, 100.0*(double)bucket_used/(double)bucket_count, + bucket_collision, 100.0*(double)bucket_collision/(double)bucket_count); + } + } +#endif + + /* cleanup the old data */ + fy_dedup_tag_data_cleanup(da, dtd); + + /* switch to the new one */ + dt->data_active = !dt->data_active; + + return 0; + +err_out: + return -1; +} + +static void fy_dedup_tag_trim(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt) +{ + if (!da || !dt) + return; + + /* just pass them trim down to the parent */ + fy_allocator_trim_tag(da->entries_allocator, dt->entries_tag); + fy_allocator_trim_tag(da->parent_allocator, dt->content_tag); +} + +static void fy_dedup_tag_reset(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt) +{ + struct fy_dedup_tag_data *dtd; + + if (!da || !dt) + return; + + /* just pass them reset down to the parent */ + fy_allocator_reset_tag(da->entries_allocator, dt->entries_tag); + fy_allocator_reset_tag(da->parent_allocator, dt->content_tag); + + dtd = &dt->data[dt->data_active]; + + fy_id_reset(dtd->bloom_id, dtd->bloom_id_count); + fy_id_reset(dtd->bloom_update_id, dtd->bloom_id_count); + fy_id_reset(dtd->buckets_in_use, dtd->bucket_id_count); + fy_id_reset(dtd->buckets_collision, dtd->bucket_id_count); +} + +static int fy_dedup_tag_update_stats(struct fy_dedup_allocator *da, struct fy_dedup_tag *dt, struct fy_allocator_stats *stats) +{ + unsigned int i; + + if (!da || !dt) + return -1; + + /* collect the underlying stats */ + fy_allocator_update_stats(da->entries_allocator, dt->entries_tag, stats); + fy_allocator_update_stats(da->parent_allocator, dt->content_tag, stats); + + /* and update with this ones */ + for (i = 0; i < ARRAY_SIZE(dt->stats.counters); i++) { + stats->counters[i] += dt->stats.counters[i]; + dt->stats.counters[i] = 0; + } + + return 0; +} + +static void fy_dedup_cleanup(struct fy_allocator *a); + +#define BUCKET_ESTIMATE_DIV 1024 +#define BLOOM_ESTIMATE_DIV 128 + +static int fy_dedup_setup(struct fy_allocator *a, const void *data) +{ + struct fy_dedup_allocator *da = NULL; + const struct fy_dedup_setup_data *d; + unsigned int bloom_filter_bits, bucket_count_bits; + unsigned int bit_shift, chain_length_grow_trigger; + size_t dedup_threshold; + bool has_estimate; + + if (!a || !data) + return -1; + + d = data; + if (!d->parent_allocator) + return -1; + + has_estimate = d->estimated_content_size && d->estimated_content_size != SIZE_MAX; + + /* power of two so ffs = log2 */ + bit_shift = (unsigned int)fy_id_ffs(FY_ID_BITS_BITS); + + bucket_count_bits = d->bucket_count_bits; + if (!bucket_count_bits && has_estimate) { + bucket_count_bits = 1; + while ((1LU << bucket_count_bits) < (d->estimated_content_size / BUCKET_ESTIMATE_DIV)) + bucket_count_bits++; + fprintf(stderr, "bucket_count_bits %u\n", bucket_count_bits); + } + /* at least that amount */ + if (bucket_count_bits < bit_shift) + bucket_count_bits = bit_shift; + /* keep the bucket count bits in signed int range */ + if (bucket_count_bits > (sizeof(int) * 8 - 1)) + bucket_count_bits = (sizeof(int) * 8) - 1; + + bloom_filter_bits = d->bloom_filter_bits; + if (!bloom_filter_bits && has_estimate) { + bloom_filter_bits = 1; + while ((1LU << bloom_filter_bits) < (d->estimated_content_size / BLOOM_ESTIMATE_DIV)) + bloom_filter_bits++; + fprintf(stderr, "bloom_filter_bits %u\n", bloom_filter_bits); + } + /* must be more than bucket count bits */ + if (bloom_filter_bits < bucket_count_bits) + bloom_filter_bits = bucket_count_bits + 3; /* minimum fanout */ + /* keep the bloom filter bits in signed int range */ + if (bloom_filter_bits > (sizeof(int) * 8 - 1)) + bloom_filter_bits = (sizeof(int) * 8) - 1; + + dedup_threshold = d->dedup_threshold; + chain_length_grow_trigger = d->chain_length_grow_trigger; + + da = container_of(a, struct fy_dedup_allocator, a); + memset(da, 0, sizeof(*da)); + + da->a.name = "dedup"; + da->a.ops = &fy_dedup_allocator_ops; + + da->parent_allocator = d->parent_allocator; + + /* just create a default mremap allocator for the entries */ + /* we don't care if they are contiguous */ + da->entries_allocator = fy_allocator_create("mremap", NULL); + if (!da->entries_allocator) + goto err_out; + + da->bloom_filter_bits = bloom_filter_bits; + da->bucket_count_bits = bucket_count_bits; + da->dedup_threshold = dedup_threshold; + da->chain_length_grow_trigger = chain_length_grow_trigger; + + /* just a seed, perhaps it should be configurable? */ + da->xxseed = ((uint64_t)rand() << 32) | (uint32_t)rand(); + /* start with the state already initialized */ + XXH64_reset(&da->xxstate_template, da->xxseed); + + fy_id_reset(da->ids, ARRAY_SIZE(da->ids)); + + return 0; +err_out: + fy_dedup_cleanup(a); + return -1; +} + +static void fy_dedup_cleanup(struct fy_allocator *a) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + unsigned int i; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + if (da->entries_allocator) + fy_allocator_destroy(da->entries_allocator); + + for (i = 0, dt = da->tags; i < ARRAY_SIZE(da->tags); i++, dt++) + fy_dedup_tag_cleanup(da, dt); +} + +struct fy_allocator *fy_dedup_create(const void *setupdata) +{ + struct fy_dedup_allocator *da = NULL; + int rc; + + da = malloc(sizeof(*da)); + if (!da) + goto err_out; + + rc = fy_dedup_setup(&da->a, setupdata); + if (rc) + goto err_out; + + return &da->a; + +err_out: + if (da) + free(da); + + return NULL; +} + +void fy_dedup_destroy(struct fy_allocator *a) +{ + struct fy_dedup_allocator *da; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + fy_dedup_cleanup(a); + free(da); +} + +void fy_dedup_dump(struct fy_allocator *a) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + unsigned int i; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + fprintf(stderr, "dedup: "); + for (i = 0, dt = da->tags; i < ARRAY_SIZE(da->tags); i++, dt++) + fprintf(stderr, "%c", fy_id_is_free(da->ids, ARRAY_SIZE(da->ids), i) ? '.' : 'x'); + fprintf(stderr, "\n"); + + for (i = 0, dt = da->tags; i < ARRAY_SIZE(da->tags); i++, dt++) { + if (fy_id_is_free(da->ids, ARRAY_SIZE(da->ids), i)) + continue; + + fprintf(stderr, " %d: tags: content=%d entries=%d\n", i, + dt->content_tag, dt->entries_tag); + + } + + fprintf(stderr, "dedup: dumping parent allocator\n"); + fy_allocator_dump(da->parent_allocator); + fprintf(stderr, "dedup: dumping entries allocator\n"); + fy_allocator_dump(da->entries_allocator); +} + +static void *fy_dedup_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + void *p; + + if (!a) + return NULL; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + goto err_out; + + /* just pass to the parent allocator using the content tag */ + p = fy_allocator_alloc(da->parent_allocator, dt->content_tag, size, align); + if (!p) + goto err_out; + + return p; + +err_out: + return NULL; +} + +static void fy_dedup_free(struct fy_allocator *a, fy_alloc_tag tag, void *data) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a) + return; + + if (!data) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return; + + /* just pass to the parent allocator */ + fy_allocator_free(da->parent_allocator, dt->content_tag, data); +} + +static int fy_dedup_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a || !stats) + return -1; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return -1; + + return fy_dedup_tag_update_stats(da, dt, stats); +} + +static const void *fy_dedup_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + XXH64_state_t xxstate; + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + struct fy_dedup_tag_data *dtd; + struct fy_dedup_entry *de; + uint64_t hash; + unsigned int bloom_pos, bucket_pos; + bool bloom_hit; + struct fy_dedup_entry_list *del; + unsigned int i, chain_length; + void *s, *e, *p, *mem = NULL; + size_t size, total_size; + + if (!a) + return NULL; + + da = container_of(a, struct fy_dedup_allocator, a); + + if (!iov) + goto err_out; + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + goto err_out; + + dtd = &dt->data[dt->data_active]; + + /* calculate data total size */ + total_size = 0; + for (i = 0; i < iovcnt; i++) + total_size += iov[i].size; + + /* if it's under the dedup threshold just allocate and copy */ + if (total_size < dtd->dedup_threshold) { + + /* just pass to the parent allocator using the content tag */ + p = fy_allocator_alloc(da->parent_allocator, dt->content_tag, total_size, align); + if (!p) + goto err_out; + + for (i = 0, s = p; i < iovcnt; i++) { + size = iov[i].size; + memcpy(s, iov[i].data, size); + s += size; + } + + return p; + } + + xxstate = da->xxstate_template; + for (i = 0; i < iovcnt; i++) + XXH64_update(&xxstate, iov[i].data, iov[i].size); + hash = XXH64_digest(&xxstate); + + /* first check in the bloom filter */ + bloom_pos = (unsigned int)hash & dtd->bloom_filter_mask; + assert((int)bloom_pos >= 0); + bloom_hit = fy_id_is_used(dtd->bloom_id, dtd->bloom_id_count, (int)bloom_pos); + + bucket_pos = (unsigned int)hash & dtd->bucket_count_mask; + assert((int)bucket_pos >= 0); + + del = dtd->buckets + bucket_pos; + assert(del < dtd->buckets_end); + + chain_length = 0; + if (bloom_hit) { + + if (!fy_id_is_used(dtd->buckets_in_use, dtd->bucket_id_count, (int)bucket_pos)) { + /* this is possible when there was a delete and bloom filter is not updated */ + goto new_entry; + } + + for (de = fy_dedup_entry_list_head(del); de; de = fy_dedup_entry_next(del, de)) { + + /* match hash */ + if (de->hash != hash) { + /* mark that we had a collision here */ + fy_id_set_used(dtd->buckets_collision, dtd->bucket_id_count, (int)bucket_pos); + } else { + /* match content */ + s = de->mem; + e = s + de->size; + for (i = 0; i < iovcnt; i++) { + size = iov[i].size; + if ((s + size) > e || memcmp(iov[i].data, s, size)) + break; + s += size; + } + /* match */ + if (i == iovcnt && s == e) + break; + } + chain_length++; + } + + if (de) { + /* increase the reference count */ + de->refs++; + + /* update stats */ + dt->stats.dup_stores++; + dt->stats.dup_saved += total_size; + + return de->mem; + } + } + +new_entry: + mem = fy_allocator_alloc(da->parent_allocator, dt->content_tag, total_size, align); + if (!mem) + goto err_out; + + /* verify it's aligned correctly */ + assert(((uintptr_t)mem & (align - 1)) == 0); + + de = fy_allocator_alloc(da->entries_allocator, dt->entries_tag, sizeof(*de), alignof(struct fy_dedup_entry)); + if (!de) + goto err_out; + + de->hash = hash; + de->refs = 1; + de->size = total_size; + de->mem = mem; + mem = NULL; + + /* and copy the data */ + s = de->mem; + for (i = 0; i < iovcnt; i++) { + size = iov[i].size; + memcpy(s, iov[i].data, size); + s += size; + } + + if (!fy_id_is_used(dtd->buckets_in_use, dtd->bucket_id_count, (int)bucket_pos)) { + fy_id_set_used(dtd->buckets_in_use, dtd->bucket_id_count, (int)bucket_pos); + fy_dedup_entry_list_init(del); + } + + /* and add to the bucket */ + fy_dedup_entry_list_add(del, de); + + /* turn the update bit for the bloom position */ + if (!bloom_hit) + fy_id_set_used(dtd->bloom_id, dtd->bloom_id_count, (int)bloom_pos); + + /* adjust by one bit, if we've hit the trigger */ + if (chain_length > dtd->chain_length_grow_trigger) + fy_dedup_tag_adjust(da, dt, 1, 1); + + /* update stats */ + dt->stats.stores++; + dt->stats.stored += total_size; + + return de->mem; + +err_out: + if (mem) + fy_allocator_free(da->parent_allocator, dt->content_tag, mem); + + return NULL; +} + +static const void *fy_dedup_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + struct fy_iovecw iov[1]; + + if (!a) + return NULL; + + /* just call the storev */ + iov[0].data = data; + iov[0].size = size; + return fy_dedup_storev(a, tag, iov, 1, align); +} + +static void fy_dedup_release(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size) +{ + XXH64_state_t xxstate; + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + struct fy_dedup_tag_data *dtd; + struct fy_dedup_entry *de; + struct fy_dedup_entry_list *del; + uint64_t hash; + unsigned int bloom_pos, bucket_pos; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + if (!data) + goto err_out; + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + goto err_out; + dtd = &dt->data[dt->data_active]; + + /* if it's under the dedup threshold just free */ + if (size < dtd->dedup_threshold) { + fy_allocator_free(da->parent_allocator, dt->content_tag, (void *)data); + return; + } + + xxstate = da->xxstate_template; + XXH64_update(&xxstate, data, size); + hash = XXH64_digest(&xxstate); + + /* first check in the bloom filter */ + bloom_pos = (unsigned int)hash & dtd->bloom_filter_mask; + assert((int)bloom_pos >= 0); + if (!fy_id_is_used(dtd->bloom_id, dtd->bloom_id_count, (int)bloom_pos)) + goto err_out; + + /* get the bucket */ + bucket_pos = (unsigned int)hash & dtd->bucket_count_mask; + assert((int)bucket_pos >= 0); + + /* if the bucket is not used bail early */ + if (!fy_id_is_used(dtd->buckets_in_use, dtd->bucket_id_count, (int)bucket_pos)) + goto err_out; + + del = dtd->buckets + bucket_pos; + assert(del < dtd->buckets_end); + + for (de = fy_dedup_entry_list_head(del); de; de = fy_dedup_entry_next(del, de)) { + /* we don't have to check the hash, really, we have a pointer */ + if (de->mem == data) + break; + } + + /* no such entry found */ + if (!de) + goto err_out; + + /* take reference */ + assert(de->refs > 0); + de->refs--; + if (de->refs > 0) + goto ok_out; + + /* remove from the bucket */ + fy_dedup_entry_list_del(del, de); + + /* turn off the in use bit if last */ + if (fy_dedup_entry_list_empty(del)) + fy_id_set_free(dtd->buckets_in_use, dtd->bucket_id_count, (int)bucket_pos); + + /* XXX find whether this was the last in bloom filter cluster + * XXX and turn off the bloom filter bit + */ + + /* we need to update the bloom filter */ + dt->bloom_filter_needs_update = true; + + bloom_pos = (unsigned int)de->hash & dtd->bloom_filter_mask; + assert((int)bloom_pos >= 0); + fy_id_set_used(dtd->bloom_update_id, dtd->bloom_id_count, (int)bloom_pos); + + /* and free content and entry */ + fy_allocator_free(da->parent_allocator, dt->content_tag, de->mem); + fy_allocator_free(da->entries_allocator, dt->entries_tag, de); + +ok_out: + dt->stats.releases++; + dt->stats.released += size; + +err_out: + return; +} + +static fy_alloc_tag fy_dedup_get_tag(struct fy_allocator *a, const void *tag_config) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt = NULL; + fy_alloc_tag tag; + int id, rc; + + if (!a) + return FY_ALLOC_TAG_ERROR; + + da = container_of(a, struct fy_dedup_allocator, a); + + /* and one from us */ + id = fy_id_alloc(da->ids, ARRAY_SIZE(da->ids)); + if (id < 0) + goto err_out; + + tag = (fy_alloc_tag)id; + + dt = fy_dedup_tag_from_tag(da, tag); + assert(dt); + + rc = fy_dedup_tag_setup(da, dt); + if (rc) + goto err_out; + + return tag; + +err_out: + fy_dedup_tag_cleanup(da, dt); + return FY_ALLOC_TAG_ERROR; +} + +static void fy_dedup_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return; + + fy_dedup_tag_cleanup(da, dt); +} + +static void fy_dedup_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return; + + fy_dedup_tag_trim(da, dt); +} + +static void fy_dedup_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a) + return; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return; + + fy_dedup_tag_reset(da, dt); +} + +static ssize_t fy_dedup_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + return -1; +} + +static const void *fy_dedup_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + struct fy_dedup_allocator *da; + struct fy_dedup_tag *dt; + + if (!a) + return NULL; + + da = container_of(a, struct fy_dedup_allocator, a); + + dt = fy_dedup_tag_from_tag(da, tag); + if (!dt) + return NULL; + + /* pass it down */ + return fy_allocator_get_single_area(da->parent_allocator, dt->content_tag, sizep, startp, allocp); +} + +const struct fy_allocator_ops fy_dedup_allocator_ops = { + .setup = fy_dedup_setup, + .cleanup = fy_dedup_cleanup, + .create = fy_dedup_create, + .destroy = fy_dedup_destroy, + .dump = fy_dedup_dump, + .alloc = fy_dedup_alloc, + .free = fy_dedup_free, + .update_stats = fy_dedup_update_stats, + .store = fy_dedup_store, + .storev = fy_dedup_storev, + .release = fy_dedup_release, + .get_tag = fy_dedup_get_tag, + .release_tag = fy_dedup_release_tag, + .trim_tag = fy_dedup_trim_tag, + .reset_tag = fy_dedup_reset_tag, + .get_areas = fy_dedup_get_areas, + .get_single_area = fy_dedup_get_single_area, +}; diff --git a/src/util/fy-allocator-dedup.h b/src/util/fy-allocator-dedup.h new file mode 100644 index 00000000..ae2a604d --- /dev/null +++ b/src/util/fy-allocator-dedup.h @@ -0,0 +1,85 @@ +/* + * fy-allocator-dedup.h - the dedup allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_DEDUP_H +#define FY_ALLOCATOR_DEDUP_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "fy-typelist.h" +#include "fy-id.h" +#include "xxhash.h" + +#include "fy-allocator.h" + +struct fy_dedup_setup_data { + struct fy_allocator *parent_allocator; + unsigned int bloom_filter_bits; + unsigned int bucket_count_bits; + size_t dedup_threshold; + unsigned int chain_length_grow_trigger; + size_t estimated_content_size; +}; + +#define FY_DEDUP_TAG_MAX 128 + +FY_TYPE_FWD_DECL_LIST(dedup_entry); +struct fy_dedup_entry { + struct list_head node; + uint64_t hash; + int64_t refs; + size_t size; + void *mem; +}; +FY_TYPE_DECL_LIST(dedup_entry); + +struct fy_dedup_tag_data { + unsigned int bloom_filter_bits; + unsigned int bloom_filter_mask; + size_t bloom_id_count; + fy_id_bits *bloom_id; + fy_id_bits *bloom_update_id; + unsigned int bucket_count_bits; + unsigned int bucket_count_mask; + size_t bucket_count; + struct fy_dedup_entry_list *buckets; + struct fy_dedup_entry_list *buckets_end; + size_t bucket_id_count; + fy_id_bits *buckets_in_use; + fy_id_bits *buckets_collision; + size_t dedup_threshold; + unsigned int chain_length_grow_trigger; +}; + +struct fy_dedup_tag { + bool bloom_filter_needs_update; + fy_alloc_tag entries_tag; + fy_alloc_tag content_tag; + struct fy_allocator_stats stats; + unsigned int data_active; /* toggle to switch active and in progress */ + struct fy_dedup_tag_data data[2]; +}; + +struct fy_dedup_allocator { + struct fy_allocator a; + struct fy_allocator *entries_allocator; + struct fy_allocator *parent_allocator; + unsigned long long xxseed; + XXH64_state_t xxstate_template; + unsigned int bloom_filter_bits; + unsigned int bucket_count_bits; + size_t dedup_threshold; + unsigned int chain_length_grow_trigger; + fy_id_bits ids[FY_ID_BITS_ARRAY_COUNT_BITS(FY_DEDUP_TAG_MAX)]; + struct fy_dedup_tag tags[FY_DEDUP_TAG_MAX]; +}; + +extern const struct fy_allocator_ops fy_dedup_allocator_ops; + +#endif diff --git a/src/util/fy-allocator-linear.c b/src/util/fy-allocator-linear.c new file mode 100644 index 00000000..ad953fc2 --- /dev/null +++ b/src/util/fy-allocator-linear.c @@ -0,0 +1,330 @@ +/* + * fy-allocator-linear.c - linear allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include "fy-utils.h" +#include "fy-allocator-linear.h" + +static int fy_linear_setup(struct fy_allocator *a, const void *data) +{ + struct fy_linear_allocator *la; + const struct fy_linear_setup_data *d; + void *buf, *alloc = NULL; + + if (!a || !data) + return -1; + + d = data; + if (!d->size) + return -1; + + if (!d->buf) { + alloc = malloc(d->size); + if (!alloc) + goto err_out; + buf = alloc; + } else + buf = d->buf; + + la = container_of(a, struct fy_linear_allocator, a); + memset(la, 0, sizeof(*la)); + la->a.name = "linear"; + la->a.ops = &fy_linear_allocator_ops; + la->alloc = alloc; + la->start = buf; + la->next = buf; + la->end = buf + d->size; + + return 0; +err_out: + if (alloc) + free(alloc); + return -1; +} + +static void fy_linear_cleanup(struct fy_allocator *a) +{ + struct fy_linear_allocator *la; + + if (!a) + return; + + la = container_of(a, struct fy_linear_allocator, a); + if (la->alloc) { + free(la->alloc); + la->alloc = NULL; + } +} + +struct fy_allocator *fy_linear_create(const void *setupdata) +{ + struct fy_linear_allocator *la; + const struct fy_linear_setup_data *d; + struct fy_linear_setup_data newsd; + void *s, *e, *buf, *alloc = NULL; + int rc; + + if (!setupdata) + return NULL; + + d = setupdata; + if (!d->size) + return NULL; + + if (!d->buf) { + alloc = malloc(d->size); + if (!alloc) + goto err_out; + buf = alloc; + } else + buf = d->buf; + + s = buf; + e = s + d->size; + + s = fy_ptr_align(s, alignof(struct fy_linear_allocator)); + if ((size_t)(e - s) < sizeof(*la)) + goto err_out; + + la = s; + s += sizeof(*la); + + memset(&newsd, 0, sizeof(newsd)); + newsd.buf = s; + newsd.size = (size_t)(e - s); + + rc = fy_linear_setup(&la->a, &newsd); + if (rc) + goto err_out; + + assert(!la->alloc); + la->alloc = alloc; + + return &la->a; + +err_out: + if (alloc) + free(alloc); + + return NULL; +} + +void fy_linear_destroy(struct fy_allocator *a) +{ + struct fy_linear_allocator *la; + void *alloc; + + if (!a) + return; + + la = container_of(a, struct fy_linear_allocator, a); + + /* take out the allocation of create */ + alloc = la->alloc; + la->alloc = NULL; + + fy_linear_cleanup(a); + + if (alloc) + free(alloc); +} + +void fy_linear_dump(struct fy_allocator *a) +{ + struct fy_linear_allocator *la; + + if (!a) + return; + + la = container_of(a, struct fy_linear_allocator, a); + + fprintf(stderr, "linear: total %zu used %zu free %zu\n", + (size_t)(la->end - la->start), + (size_t)(la->next - la->start), + (size_t)(la->end - la->next)); +} + +static void *fy_linear_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + struct fy_linear_allocator *la; + void *s; + + assert(a); + + la = container_of(a, struct fy_linear_allocator, a); + if (la->next >= la->end) + goto err_out; + + s = fy_ptr_align(la->next, align); + if ((size_t)(la->end - s) < size) + goto err_out; + + la->stats.allocations++; + la->stats.allocated += size; + + memset(s, 0, size); + + la->next = s + size; + + return s; + +err_out: + return NULL; +} + +static void fy_linear_free(struct fy_allocator *a, fy_alloc_tag tag, void *data) +{ + /* linear allocator does not free anything */ +} + +static int fy_linear_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + struct fy_linear_allocator *la; + unsigned int i; + + if (!a || !stats) + return -1; + + la = container_of(a, struct fy_linear_allocator, a); + + /* and update with this ones */ + for (i = 0; i < ARRAY_SIZE(la->stats.counters); i++) { + stats->counters[i] += la->stats.counters[i]; + la->stats.counters[i] = 0; + } + + return 0; +} + +static const void *fy_linear_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + void *p; + + if (!a) + return NULL; + + p = fy_linear_alloc(a, tag, size, align); + if (!p) + goto err_out; + + memcpy(p, data, size); + + return p; + +err_out: + return NULL; +} + +static const void *fy_linear_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + void *p, *start; + unsigned int i; + size_t size; + + if (!a) + return NULL; + + size = 0; + for (i = 0; i < iovcnt; i++) + size += iov[i].size; + + start = fy_linear_alloc(a, tag, size, align); + if (!start) + goto err_out; + + for (i = 0, p = start; i < iovcnt; i++, p += size) { + size = iov[i].size; + memcpy(p, iov[i].data, size); + } + + return start; + +err_out: + return NULL; +} + +static void fy_linear_release(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size) +{ + /* nothing */ +} + +static fy_alloc_tag fy_linear_get_tag(struct fy_allocator *a, const void *tag_config) +{ + if (!a) + return FY_ALLOC_TAG_ERROR; + + /* always return 0, we don't do tags for linear */ + return 0; +} + +static void fy_linear_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_linear_allocator *la; + + if (!a) + return; + + /* we only give out 0 as a tag */ + assert(tag == 0); + + la = container_of(a, struct fy_linear_allocator, a); + + /* we just rewind */ + la->next = la->start; +} + +static void fy_linear_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + /* nothing */ +} + +static void fy_linear_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + /* nothing */ +} + +static ssize_t fy_linear_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + return -1; +} + +static const void *fy_linear_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + return NULL; +} + +const struct fy_allocator_ops fy_linear_allocator_ops = { + .setup = fy_linear_setup, + .cleanup = fy_linear_cleanup, + .create = fy_linear_create, + .destroy = fy_linear_destroy, + .dump = fy_linear_dump, + .alloc = fy_linear_alloc, + .free = fy_linear_free, + .update_stats = fy_linear_update_stats, + .store = fy_linear_store, + .storev = fy_linear_storev, + .release = fy_linear_release, + .get_tag = fy_linear_get_tag, + .release_tag = fy_linear_release_tag, + .trim_tag = fy_linear_trim_tag, + .reset_tag = fy_linear_reset_tag, + .get_areas = fy_linear_get_areas, + .get_single_area = fy_linear_get_single_area, +}; diff --git a/src/util/fy-allocator-linear.h b/src/util/fy-allocator-linear.h new file mode 100644 index 00000000..748528c7 --- /dev/null +++ b/src/util/fy-allocator-linear.h @@ -0,0 +1,33 @@ +/* + * fy-allocator-linear.h - the linear allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_LINEAR_H +#define FY_ALLOCATOR_LINEAR_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "fy-allocator.h" + +struct fy_linear_setup_data { + void *buf; + size_t size; +}; + +struct fy_linear_allocator { + struct fy_allocator a; + struct fy_allocator_stats stats; + void *alloc; + void *start; + void *next; + void *end; +}; + +extern const struct fy_allocator_ops fy_linear_allocator_ops; + +#endif diff --git a/src/util/fy-allocator-malloc.c b/src/util/fy-allocator-malloc.c new file mode 100644 index 00000000..dcb4ff61 --- /dev/null +++ b/src/util/fy-allocator-malloc.c @@ -0,0 +1,428 @@ +/* + * fy-allocator-malloc.c - malloc allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include + +/* for container_of */ +#include "fy-list.h" +#include "fy-utils.h" + +#include "fy-allocator-malloc.h" + +static inline struct fy_malloc_tag * +fy_malloc_tag_from_tag(struct fy_malloc_allocator *ma, fy_alloc_tag tag) +{ + if (!ma) + return NULL; + + if ((unsigned int)tag >= ARRAY_SIZE(ma->tags)) + return NULL; + + if (!fy_id_is_used(ma->ids, ARRAY_SIZE(ma->ids), (int)tag)) + return NULL; + + return &ma->tags[tag]; +} + +static int fy_malloc_setup(struct fy_allocator *a, const void *data) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + unsigned int i; + + if (!a) + return -1; + + ma = container_of(a, struct fy_malloc_allocator, a); + memset(ma, 0, sizeof(*ma)); + ma->a.name = "malloc"; + ma->a.ops = &fy_malloc_allocator_ops; + + fy_id_reset(ma->ids, ARRAY_SIZE(ma->ids)); + for (i = 0, mt = ma->tags; i < ARRAY_SIZE(ma->tags); i++, mt++) + fy_malloc_entry_list_init(&mt->entries); + + return 0; +} + +static void fy_malloc_cleanup(struct fy_allocator *a) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_entry *me; + struct fy_malloc_tag *mt; + unsigned int i; + + if (!a) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + for (i = 0, mt = ma->tags; i < ARRAY_SIZE(ma->tags); i++, mt++) { + while ((me = fy_malloc_entry_list_pop(&mt->entries)) != NULL) + free(me); + } +} + +struct fy_allocator *fy_malloc_create(const void *setupdata) +{ + struct fy_malloc_allocator *ma = NULL; + int rc; + + ma = malloc(sizeof(*ma)); + if (!ma) + goto err_out; + + rc = fy_malloc_setup(&ma->a, setupdata); + if (rc) + goto err_out; + + return &ma->a; + +err_out: + if (ma) + free(ma); + + return NULL; +} + +void fy_malloc_destroy(struct fy_allocator *a) +{ + struct fy_malloc_allocator *ma; + + if (!a) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + fy_malloc_cleanup(a); + free(ma); +} + +void fy_malloc_dump(struct fy_allocator *a) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + struct fy_malloc_entry *me; + unsigned int i; + size_t count, total, system_total; + + if (!a) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + fprintf(stderr, "malloc: "); + for (i = 0, mt = ma->tags; i < ARRAY_SIZE(ma->tags); i++, mt++) + fprintf(stderr, "%c", fy_malloc_entry_list_empty(&mt->entries) ? '.' : 'x'); + fprintf(stderr, "\n"); + + for (i = 0, mt = ma->tags; i < ARRAY_SIZE(ma->tags); i++, mt++) { + if (fy_malloc_entry_list_empty(&mt->entries)) + continue; + count = total = system_total = 0; + for (me = fy_malloc_entry_list_head(&mt->entries); me; me = fy_malloc_entry_next(&mt->entries, me)) { + count++; + total += me->size; + system_total += sizeof(*me) + me->size; + } + fprintf(stderr, " %d: count %zu total %zu system %zu overhead %zu (%2.2f%%)\n", i, + count, total, system_total, system_total - total, + 100.0 * (double)(system_total - total) / (double)system_total); + } +} + +static void *fy_malloc_tag_alloc(struct fy_malloc_allocator *ma, struct fy_malloc_tag *mt, size_t size, size_t align) +{ + struct fy_malloc_entry *me; + void *p; + int ret; + + assert(align <= 16); + + size = size + sizeof(*me); + ret = posix_memalign(&p, 16, size); + if (ret) + goto err_out; + + me = p; + assert(((uintptr_t)me & 15) == 0); + assert(((uintptr_t)(&me->mem[0]) & 15) == 0); + + me->size = size; + fy_malloc_entry_list_add(&mt->entries, me); + + return &me->mem[0]; + +err_out: + return NULL; +} + +static void fy_malloc_tag_free(struct fy_malloc_allocator *ma, struct fy_malloc_tag *mt, void *data) +{ + struct fy_malloc_entry *me; + + me = container_of(data, struct fy_malloc_entry, mem); + + fy_malloc_entry_list_del(&mt->entries, me); + free(me); +} + +static void *fy_malloc_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + void *p; + + if (!a || tag < 0) + return NULL; + + /* maximum align is 16 TODO */ + if (align > 16) + goto err_out; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + goto err_out; + + p = fy_malloc_tag_alloc(ma, mt, size, align); + if (!p) + goto err_out; + + mt->stats.allocations++; + mt->stats.allocated += size; + + return p; + +err_out: + return NULL; +} + +static void fy_malloc_free(struct fy_allocator *a, fy_alloc_tag tag, void *data) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + + if (!a || tag < 0 || !data) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + return; + + fy_malloc_tag_free(ma, mt, data); + + mt->stats.frees++; +} + +static int fy_malloc_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + unsigned int i; + + if (!a || !stats) + return -1; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + goto err_out; + + /* and update with this ones */ + for (i = 0; i < ARRAY_SIZE(mt->stats.counters); i++) { + stats->counters[i] += mt->stats.counters[i]; + mt->stats.counters[i] = 0; + } + + return 0; + +err_out: + return -1; +} + +static const void *fy_malloc_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + void *p, *start; + unsigned int i; + size_t size, total_size; + + if (!a || !iov) + return NULL; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + goto err_out; + + total_size = 0; + for (i = 0; i < iovcnt; i++) + total_size += iov[i].size; + + start = fy_malloc_tag_alloc(ma, mt, total_size, align); + if (!start) + goto err_out; + + for (i = 0, p = start; i < iovcnt; i++, p += size) { + size = iov[i].size; + memcpy(p, iov[i].data, size); + } + + mt->stats.stores++; + mt->stats.stored += total_size; + + return start; + +err_out: + return NULL; +} + +static const void *fy_malloc_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + struct fy_iovecw iov[1]; + + if (!a) + return NULL; + + /* just call the storev */ + iov[0].data = data; + iov[0].size = size; + return fy_malloc_storev(a, tag, iov, 1, align); +} + +static void fy_malloc_release(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + + if (!a || !data || !size) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + return; + + /* the malloc's release is just a free */ + fy_malloc_tag_free(ma, mt, (void *)data); + + mt->stats.releases++; + mt->stats.released += size; +} + +static void fy_malloc_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + struct fy_malloc_entry *me; + + if (!a) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + return; + + while ((me = fy_malloc_entry_list_pop(&mt->entries)) != NULL) + free(me); + + fy_id_free(ma->ids, ARRAY_SIZE(ma->ids), tag); +} + +static fy_alloc_tag fy_malloc_get_tag(struct fy_allocator *a, const void *tag_config) +{ + struct fy_malloc_allocator *ma; + int id; + + if (!a) + return FY_ALLOC_TAG_ERROR; + + ma = container_of(a, struct fy_malloc_allocator, a); + + id = fy_id_alloc(ma->ids, ARRAY_SIZE(ma->ids)); + if (id < 0) + goto err_out; + + return (fy_alloc_tag)id; + +err_out: + return FY_ALLOC_TAG_ERROR; +} + +static void fy_malloc_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + /* nothing */ +} + +static void fy_malloc_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_malloc_allocator *ma; + struct fy_malloc_tag *mt; + struct fy_malloc_entry *me; + + if (!a) + return; + + ma = container_of(a, struct fy_malloc_allocator, a); + + mt = fy_malloc_tag_from_tag(ma, tag); + if (!mt) + return; + + while ((me = fy_malloc_entry_list_pop(&mt->entries)) != NULL) + free(me); +} + +static ssize_t fy_malloc_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + return -1; +} + +static const void *fy_malloc_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + /* can never get a single area */ + return NULL; +} + +const struct fy_allocator_ops fy_malloc_allocator_ops = { + .setup = fy_malloc_setup, + .cleanup = fy_malloc_cleanup, + .create = fy_malloc_create, + .destroy = fy_malloc_destroy, + .dump = fy_malloc_dump, + .alloc = fy_malloc_alloc, + .free = fy_malloc_free, + .update_stats = fy_malloc_update_stats, + .store = fy_malloc_store, + .storev = fy_malloc_storev, + .release = fy_malloc_release, + .get_tag = fy_malloc_get_tag, + .release_tag = fy_malloc_release_tag, + .trim_tag = fy_malloc_trim_tag, + .reset_tag = fy_malloc_reset_tag, + .get_areas = fy_malloc_get_areas, + .get_single_area = fy_malloc_get_single_area, +}; diff --git a/src/util/fy-allocator-malloc.h b/src/util/fy-allocator-malloc.h new file mode 100644 index 00000000..4f5856ee --- /dev/null +++ b/src/util/fy-allocator-malloc.h @@ -0,0 +1,46 @@ +/* + * fy-allocator-malloc.h - the malloc allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_MALLOC_H +#define FY_ALLOCATOR_MALLOC_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "fy-typelist.h" +#include "fy-id.h" + +#include "fy-allocator.h" + +struct fy_malloc_tag; + +FY_TYPE_FWD_DECL_LIST(malloc_entry); +struct fy_malloc_entry { + struct list_head node; + size_t size; + uint64_t mem[] __attribute__((aligned(16))); +}; +FY_TYPE_DECL_LIST(malloc_entry); + +#define FY_MALLOC_TAG_MAX 32 + +struct fy_malloc_tag { + struct fy_malloc_entry_list entries; + struct fy_allocator_stats stats; +}; + +struct fy_malloc_allocator { + struct fy_allocator a; + struct fy_malloc_entry_list entries; + fy_id_bits ids[FY_ID_BITS_ARRAY_COUNT_BITS(FY_MALLOC_TAG_MAX)]; + struct fy_malloc_tag tags[FY_MALLOC_TAG_MAX]; +}; + +extern const struct fy_allocator_ops fy_malloc_allocator_ops; + +#endif diff --git a/src/util/fy-allocator-mremap.c b/src/util/fy-allocator-mremap.c new file mode 100644 index 00000000..39e0a489 --- /dev/null +++ b/src/util/fy-allocator-mremap.c @@ -0,0 +1,794 @@ +/* + * fy-allocator-mremap.c - mremap allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* for container_of */ +#include "fy-list.h" +#include "fy-utils.h" + +#include "fy-allocator-mremap.h" + +#define DEBUG_ARENA + +static struct fy_mremap_arena * +fy_mremap_arena_create(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt, size_t size) +{ + struct fy_mremap_arena *mran = NULL; + void *mem; + size_t size_page_align, balloon_size; + + if (size < mra->minimum_arena_size) + size = mra->minimum_arena_size; + + size_page_align = fy_size_t_align(size + FY_MREMAP_ARENA_OVERHEAD, mra->pagesz); + switch (mra->arena_type) { + case FYMRAT_MALLOC: + mran = calloc(1, size_page_align); + break; + case FYMRAT_MMAP: + /* allocate an initial ballooned size */ + balloon_size = fy_size_t_align((size_t)(size_page_align * mra->balloon_ratio), mra->pagesz); + if (balloon_size == size_page_align) + balloon_size = size_page_align + mra->pagesz; + mem = mmap(NULL, balloon_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + /* first allocation failed, that's ok, try again */ + mran = mmap(NULL, size_page_align, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + } else { + mran = mremap(mem, balloon_size, size_page_align, 0); + assert(mran == MAP_FAILED || mran == mem); + } + if (mran == MAP_FAILED) + return NULL; + break; + } + + if (!mran) + return NULL; + mran->size = size_page_align; + mran->next = FY_MREMAP_ARENA_OVERHEAD; + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: #%zu created arena %p size=%zu (%zuMB)\n", __func__, mrt - mra->tags, mran, mran->size, mran->size >> 20); +#endif + + return mran; +} + +static void fy_mremap_arena_destroy(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt, struct fy_mremap_arena *mran) +{ + if (!mran) + return; + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: #%zu destroy arena %p size=%zu (%zuMB)\n", __func__, mrt - mra->tags, mran, mran->size, mran->size >> 20); +#endif + switch (mra->arena_type) { + case FYMRAT_MALLOC: + free(mran); + break; + + case FYMRAT_MMAP: + (void)munmap(mran, mran->size); + break; + } +} + +static int fy_mremap_arena_grow(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt, struct fy_mremap_arena *mran, + size_t size, size_t align) +{ + void *mem; + + if (!mran || !size) + return -1; + + switch (mra->arena_type) { + case FYMRAT_MALLOC: + /* can't grow malloc without moving the pointer */ + break; + + case FYMRAT_MMAP: + + /* if the grow needs to be larger than double, don't bother */ + if (fy_size_t_align(mran->next, align) + size > 2 * mran->size) + break; + + /* double the arena */ + mem = mremap(mran, mran->size, mran->size * 2, 0); + if (mem == MAP_FAILED) + break; + + assert(mem == mran); + mran->size *= 2; + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: #%zu grew arena %p size=%zu (%zuMB)\n", __func__, mrt - mra->tags, mran, mran->size, mran->size >> 20); +#endif + + /* verify that we grow right */ + assert(fy_size_t_align(mran->next, align) + size <= mran->size); + return 0; + } + + return -1; +} + +static int fy_mremap_arena_trim(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt, struct fy_mremap_arena *mran) +{ + size_t new_size; + void *mem FY_DEBUG_UNUSED; + + if (!mran) + return -1; + + switch (mra->arena_type) { + case FYMRAT_MALLOC: + /* trim not possible (or is it? should be possible to probe) */ + break; + + case FYMRAT_MMAP: + /* check the page size */ + new_size = fy_size_t_align(mran->next, mra->pagesz); + if (new_size >= mran->size) + break; + +#ifdef DEBUG_ARENA + fprintf(stderr, "trim: %zu -> %zu\n", mran->size, new_size); +#endif + /* failure to shrink a mapping is unthinkable */ + mem = mremap(mran, mran->size, new_size, 0); + assert(mem != MAP_FAILED); + assert(mem == mran); + mran->size = new_size; + return 0; + } + + return -1; +} + +static inline struct fy_mremap_tag * +fy_mremap_tag_from_tag(struct fy_mremap_allocator *mra, fy_alloc_tag tag) +{ + if (!mra) + return NULL; + + if ((unsigned int)tag >= ARRAY_SIZE(mra->tags)) + return NULL; + + if (!fy_id_is_used(mra->ids, ARRAY_SIZE(mra->ids), (int)tag)) + return NULL; + + return &mra->tags[tag]; +} + +static void fy_mremap_tag_cleanup(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt) +{ + struct fy_mremap_arena *mran; + size_t total_sys_alloc, total_wasted; + struct fy_mremap_arena_list *mranl, *arena_lists[2]; + unsigned int j; + int id; + + if (!mra || !mrt) + return; + + /* get the id from the pointer */ + id = mrt - mra->tags; + assert((unsigned int)id < ARRAY_SIZE(mra->tags)); + + /* already clean? */ + if (fy_id_is_free(mra->ids, ARRAY_SIZE(mra->ids), id)) + return; + + fy_id_free(mra->ids, ARRAY_SIZE(mra->ids), id); + + total_sys_alloc = 0; + total_wasted = 0; + arena_lists[0] = &mrt->arenas; + arena_lists[1] = &mrt->full_arenas; + for (j = 0; j < ARRAY_SIZE(arena_lists); j++) { + mranl = arena_lists[j]; + for (mran = fy_mremap_arena_list_head(mranl); mran; mran = fy_mremap_arena_next(mranl, mran)) { + total_sys_alloc += mran->size; + total_wasted += (mran->size - mran->next); + } + } + /* keep the variables without warning */ + (void)total_sys_alloc; + (void)total_wasted; + // fprintf(stderr, "total_sys_alloc=%zu total_wasted=%zu\n", total_sys_alloc, total_wasted); + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: destroying active arenas\n", __func__); +#endif + while ((mran = fy_mremap_arena_list_pop(&mrt->arenas)) != NULL) + fy_mremap_arena_destroy(mra, mrt, mran); + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: destroying full arenas\n", __func__); +#endif + while ((mran = fy_mremap_arena_list_pop(&mrt->full_arenas)) != NULL) + fy_mremap_arena_destroy(mra, mrt, mran); +} + +static void fy_mremap_tag_trim(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt) +{ + struct fy_mremap_arena *mran; + struct fy_mremap_arena_list *mranl, *arena_lists[2]; + size_t wasted_before, wasted_after; + unsigned int j; + + if (!mra || !mrt) + return; + + if (!fy_mremap_arena_type_is_trimmable(mra->arena_type)) + return; + + arena_lists[0] = &mrt->arenas; + arena_lists[1] = &mrt->full_arenas; + + wasted_before = 0; + wasted_after = 0; + for (j = 0; j < ARRAY_SIZE(arena_lists); j++) { + mranl = arena_lists[j]; + for (mran = fy_mremap_arena_list_head(mranl); mran; mran = fy_mremap_arena_next(mranl, mran)) { + wasted_before += (mran->size - mran->next); + (void)fy_mremap_arena_trim(mra, mrt, mran); + wasted_after += (mran->size - mran->next); + } + } + /* keep the variables without warning */ + (void)wasted_before; + (void)wasted_after; + // fprintf(stderr, "wasted_before=%zu wasted_after=%zu\n", wasted_before, wasted_after); +} + +static void fy_mremap_tag_reset(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt) +{ + struct fy_mremap_arena *mran; + + if (!mra || !mrt) + return; + + /* just destroy the arenas */ + while ((mran = fy_mremap_arena_list_pop(&mrt->arenas)) != NULL) + fy_mremap_arena_destroy(mra, mrt, mran); + + while ((mran = fy_mremap_arena_list_pop(&mrt->full_arenas)) != NULL) + fy_mremap_arena_destroy(mra, mrt, mran); +} + +static void fy_mremap_tag_setup(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt) +{ + assert(mra); + assert(mrt); + + memset(mrt, 0, sizeof(*mrt)); + fy_mremap_arena_list_init(&mrt->arenas); + fy_mremap_arena_list_init(&mrt->full_arenas); + mrt->next_arena_sz = mra->pagesz; +} + +static void *fy_mremap_tag_alloc(struct fy_mremap_allocator *mra, struct fy_mremap_tag *mrt, size_t size, size_t align) +{ + struct fy_mremap_arena *mran; + size_t left, size_page_align; + void *ptr; + int rc; + + /* calculate how many pages new allocation is */ + size_page_align = fy_size_t_align(size + FY_MREMAP_ARENA_OVERHEAD, mra->pagesz); + + if (size_page_align > mra->big_alloc_threshold) { + + mran = fy_mremap_arena_create(mra, mrt, size); + if (!mran) + goto err_out; + + // fprintf(stderr, "allocated new big mran->size=%zu size=%zu\n", mran->size, size); + + fy_mremap_arena_list_add_tail(&mrt->arenas, mran); + goto do_alloc; + } + + /* 'small' allocation, try to find an arena that fits first */ + for (mran = fy_mremap_arena_list_head(&mrt->arenas); mran; + mran = fy_mremap_arena_next(&mrt->arenas, mran)) { + left = mran->size - fy_size_t_align(mran->next, align); + if (left >= size) { + /* make this the new head */ + if (mran != fy_mremap_arena_list_head(&mrt->arenas)) { + fy_mremap_arena_list_del(&mrt->arenas, mran); + fy_mremap_arena_list_add(&mrt->arenas, mran); + } + goto do_alloc; + } + } + + /* not found space in any arena, try to grow */ + if (fy_mremap_arena_type_is_growable(mra->arena_type)) { + for (mran = fy_mremap_arena_list_head(&mrt->arenas); mran; + mran = fy_mremap_arena_next(&mrt->arenas, mran)) { + + rc = fy_mremap_arena_grow(mra, mrt, mran, size, align); + if (rc) + continue; + + // fprintf(stderr, "grow successful mran->size=%zu size=%zu\n", mran->size, size); + + left = mran->size - fy_size_t_align(mran->next, align); + assert(left >= size); + + /* make this the new head */ + if (mran != fy_mremap_arena_list_head(&mrt->arenas)) { + fy_mremap_arena_list_del(&mrt->arenas, mran); + fy_mremap_arena_list_add(&mrt->arenas, mran); + } + goto do_alloc; + } + + } + + /* everything failed, we have to allocate a new arena */ + + /* increase by the ratio until we're over */ + while (mrt->next_arena_sz < size) + mrt->next_arena_sz = (size_t)(mrt->next_arena_sz * mra->grow_ratio); + + /* all failed, just new */ + mran = fy_mremap_arena_create(mra, mrt, mrt->next_arena_sz); + if (!mran) + goto err_out; + + mrt->next_arena_sz = (size_t)(mrt->next_arena_sz * mra->grow_ratio); + + // fprintf(stderr, "allocated new %p mran->size=%zu size=%zu\n", mran, mran->size, size); + + fy_mremap_arena_list_add(&mrt->arenas, mran); + +do_alloc: + mran->next = fy_size_t_align(mran->next, align); + ptr = (void *)mran + mran->next; + mran->next += size; + left = mran->size - mran->next; + assert((ssize_t)left >= 0); + + /* if it's empty, or almost empty, move it to the full arenas list */ + if (left < mra->empty_threshold) { + + /* if the arena is growable, try to grow it */ + if (fy_mremap_arena_type_is_growable(mra->arena_type)) { + rc = fy_mremap_arena_grow(mra, mrt, mran, size, align); + if (!rc) + left = mran->size - mran->next; + } + + /* still under the threshold, move it to full */ + if (left < mra->empty_threshold) { + // fprintf(stderr, "move %p mran->size=%zu left=%zu to free\n", mran, mran->size, left); + fy_mremap_arena_list_del(&mrt->arenas, mran); + fy_mremap_arena_list_add_tail(&mrt->full_arenas, mran); + +#ifdef DEBUG_ARENA + fprintf(stderr, "%s: #%zu moved arena %p size=%zu (%zuMB) to full\n", __func__, mrt - mra->tags, mran, mran->size, mran->size >> 20); +#endif + } + + } + + // fprintf(stderr, "this %zu next %zu size %zu\n", (size_t)(s - (void *)mran), mran->next, size); + // + return ptr; + +err_out: + return NULL; +} + +static const struct fy_mremap_setup_data default_setup_data = { + .big_alloc_threshold = SIZE_MAX, + .empty_threshold = 64, + .minimum_arena_size = 1U << 20, + .grow_ratio = 2.0, + .balloon_ratio = 32.0, + .arena_type = FYMRAT_MMAP, +}; + +static int fy_mremap_setup(struct fy_allocator *a, const void *data) +{ + const struct fy_mremap_setup_data *d; + struct fy_mremap_allocator *mra; + + if (!a) + return -1; + + d = data ? data : &default_setup_data; + + mra = container_of(a, struct fy_mremap_allocator, a); + memset(mra, 0, sizeof(*mra)); + mra->a.name = "mremap"; + mra->a.ops = &fy_mremap_allocator_ops; + mra->pagesz = sysconf(_SC_PAGESIZE); + /* pagesz is size of 2 find the first set bit */ + mra->pageshift = fy_id_ffs((fy_id_bits)mra->pagesz); + +#if 0 + /* config */ + mra->big_alloc_threshold = SIZE_MAX; /* no big alloc (by default) */ + mra->empty_threshold = 64; /* below that free assume it's empty */ + mra->minimum_arena_size = 1U << 20; /* minimum arena size is 1MB */ + mra->grow_ratio = 2.0; + mra->balloon_ratio = 32.0; /* ballon ratio is 32 (so 1MB -> 32MB vm size) */ + mra->arena_type = FYMRAT_MMAP; +#else + mra->big_alloc_threshold = d->big_alloc_threshold; + mra->empty_threshold = d->empty_threshold; + mra->minimum_arena_size = d->minimum_arena_size; + mra->grow_ratio = d->grow_ratio; + mra->balloon_ratio = d->balloon_ratio; + mra->arena_type = d->arena_type; +#endif + + fy_id_reset(mra->ids, ARRAY_SIZE(mra->ids)); + return 0; +} + +static void fy_mremap_cleanup(struct fy_allocator *a) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + unsigned int i; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + + for (i = 0, mrt = mra->tags; i < ARRAY_SIZE(mra->tags); i++, mrt++) + fy_mremap_tag_cleanup(mra, mrt); +} + +struct fy_allocator *fy_mremap_create(const void *setupdata) +{ + struct fy_mremap_allocator *mra = NULL; + int rc; + + mra = malloc(sizeof(*mra)); + if (!mra) + goto err_out; + + rc = fy_mremap_setup(&mra->a, setupdata); + if (rc) + goto err_out; + + return &mra->a; + +err_out: + if (mra) + free(mra); + + return NULL; +} + +void fy_mremap_destroy(struct fy_allocator *a) +{ + struct fy_mremap_allocator *mra; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + fy_mremap_cleanup(a); + free(mra); +} + +void fy_mremap_dump(struct fy_allocator *a) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + struct fy_mremap_arena *mran; + struct fy_mremap_arena_list *mranl, *arena_lists[2]; + unsigned int i, j; + size_t count, active_count, full_count, total, system_total; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + + fprintf(stderr, "mremap: "); + for (i = 0, mrt = mra->tags; i < ARRAY_SIZE(mra->tags); i++, mrt++) + fprintf(stderr, "%c", fy_id_is_free(mra->ids, ARRAY_SIZE(mra->ids), i) ? '.' : 'x'); + fprintf(stderr, "\n"); + + for (i = 0, mrt = mra->tags; i < ARRAY_SIZE(mra->tags); i++, mrt++) { + if (fy_id_is_free(mra->ids, ARRAY_SIZE(mra->ids), i)) + continue; + + count = full_count = active_count = total = system_total = 0; + arena_lists[0] = &mrt->arenas; + arena_lists[1] = &mrt->full_arenas; + for (j = 0; j < ARRAY_SIZE(arena_lists); j++) { + mranl = arena_lists[j]; + for (mran = fy_mremap_arena_list_head(mranl); mran; mran = fy_mremap_arena_next(mranl, mran)) { + total += mran->next; + system_total += mran->size; + count++; + if (j == 0) + active_count++; + else + full_count++; + } + } + + fprintf(stderr, " %d: count %zu (a=%zu/f=%zu) total %zu system %zu overhead %zu (%2.2f%%)\n", i, + count, active_count, full_count, + total, system_total, system_total - total, + 100.0 * (double)(system_total - total) / (double)system_total); + } +} + +static void *fy_mremap_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + void *ptr; + + if (!a || tag < 0) + return NULL; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + goto err_out; + + ptr = fy_mremap_tag_alloc(mra, mrt, size, align); + if (!ptr) + goto err_out; + + mrt->stats.allocations++; + mrt->stats.allocated += size; + + return ptr; + +err_out: + return NULL; +} + +static void fy_mremap_free(struct fy_allocator *a, fy_alloc_tag tag, void *data) +{ + /* no frees */ +} + +static int fy_mremap_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + unsigned int i; + + if (!a || !stats) + return -1; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + goto err_out; + + /* and update with this ones */ + for (i = 0; i < ARRAY_SIZE(mrt->stats.counters); i++) { + stats->counters[i] += mrt->stats.counters[i]; + mrt->stats.counters[i] = 0; + } + + return 0; + +err_out: + return -1; +} + +static const void *fy_mremap_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + void *p, *start; + unsigned int i; + size_t total_size, size; + + if (!a || !iov) + return NULL; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + goto err_out; + + total_size = 0; + for (i = 0; i < iovcnt; i++) + total_size += iov[i].size; + + start = fy_mremap_tag_alloc(mra, mrt, total_size, align); + if (!start) + goto err_out; + + for (i = 0, p = start; i < iovcnt; i++, p += size) { + size = iov[i].size; + memcpy(p, iov[i].data, size); + } + + mrt->stats.stores++; + mrt->stats.stored += total_size; + + return start; + +err_out: + return NULL; +} + +static const void *fy_mremap_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + struct fy_iovecw iov[1]; + + if (!a) + return NULL; + + /* just call the storev */ + iov[0].data = data; + iov[0].size = size; + return fy_mremap_storev(a, tag, iov, 1, align); +} + +static void fy_mremap_release(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size) +{ + /* no releases */ +} + +static void fy_mremap_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + return; + + fy_mremap_tag_cleanup(mra, mrt); +} + +static fy_alloc_tag fy_mremap_get_tag(struct fy_allocator *a, const void *tag_config) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + int id; + + if (!a) + return FY_ALLOC_TAG_ERROR; + + mra = container_of(a, struct fy_mremap_allocator, a); + + id = fy_id_alloc(mra->ids, ARRAY_SIZE(mra->ids)); + if (id < 0) + goto err_out; + + mrt = fy_mremap_tag_from_tag(mra, id); + assert(mrt); + + fy_mremap_tag_setup(mra, mrt); + + return (fy_alloc_tag)id; + +err_out: + return FY_ALLOC_TAG_ERROR; +} + +static void fy_mremap_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + return; + + fy_mremap_tag_trim(mra, mrt); +} + +static void fy_mremap_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + + if (!a) + return; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + return; + + fy_mremap_tag_reset(mra, mrt); +} + +static ssize_t fy_mremap_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + return -1; +} + +static const void *fy_mremap_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + struct fy_mremap_allocator *mra; + struct fy_mremap_tag *mrt; + struct fy_mremap_arena *mran; + + if (!a) + return NULL; + + mra = container_of(a, struct fy_mremap_allocator, a); + + mrt = fy_mremap_tag_from_tag(mra, tag); + if (!mrt) + return NULL; + + /* if there are any full arenas, or more than one active, it's not single area */ + if (!fy_mremap_arena_list_empty(&mrt->full_arenas) || + !fy_mremap_arena_list_is_singular(&mrt->arenas)) + return NULL; + + mran = fy_mremap_arena_list_head(&mrt->arenas); + assert(mran); + + *sizep = mran->next; + *startp = offsetof(struct fy_mremap_arena, mem); + *allocp = mran->size; + return mran; +} + +const struct fy_allocator_ops fy_mremap_allocator_ops = { + .setup = fy_mremap_setup, + .cleanup = fy_mremap_cleanup, + .create = fy_mremap_create, + .destroy = fy_mremap_destroy, + .dump = fy_mremap_dump, + .alloc = fy_mremap_alloc, + .free = fy_mremap_free, + .update_stats = fy_mremap_update_stats, + .store = fy_mremap_store, + .storev = fy_mremap_storev, + .release = fy_mremap_release, + .get_tag = fy_mremap_get_tag, + .release_tag = fy_mremap_release_tag, + .trim_tag = fy_mremap_trim_tag, + .reset_tag = fy_mremap_reset_tag, + .get_areas = fy_mremap_get_areas, + .get_single_area = fy_mremap_get_single_area, +}; + diff --git a/src/util/fy-allocator-mremap.h b/src/util/fy-allocator-mremap.h new file mode 100644 index 00000000..66bd8ee3 --- /dev/null +++ b/src/util/fy-allocator-mremap.h @@ -0,0 +1,82 @@ +/* + * fy-allocator-mremap.h - the mremap allocator + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_MREMAP_H +#define FY_ALLOCATOR_MREMAP_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "fy-typelist.h" +#include "fy-id.h" + +#include "fy-allocator.h" + +struct fy_mremap_tag; + +#define FY_MREMAP_TAG_MAX 32 + +enum fy_mremap_arena_type { + FYMRAT_MALLOC, + FYMRAT_MMAP, +}; + +static inline bool fy_mremap_arena_type_is_growable(enum fy_mremap_arena_type type) +{ + return type == FYMRAT_MMAP; +} + +static inline bool fy_mremap_arena_type_is_trimmable(enum fy_mremap_arena_type type) +{ + return type == FYMRAT_MMAP; +} + +struct fy_mremap_setup_data { + size_t big_alloc_threshold; /* bigger than that and a new allocation */ + size_t empty_threshold; /* less than that and get moved to full */ + size_t minimum_arena_size; /* the minimum arena size */ + float grow_ratio; + float balloon_ratio; + enum fy_mremap_arena_type arena_type; +}; + +FY_TYPE_FWD_DECL_LIST(mremap_arena); +struct fy_mremap_arena { + struct list_head node; + size_t size; /* includes the arena header */ + size_t next; + uint64_t mem[] __attribute__((aligned(16))); +}; +FY_TYPE_DECL_LIST(mremap_arena); + +#define FY_MREMAP_ARENA_OVERHEAD (offsetof(struct fy_mremap_arena, mem)) + +struct fy_mremap_tag { + struct fy_mremap_arena_list arenas; + struct fy_mremap_arena_list full_arenas; + size_t next_arena_sz; + struct fy_allocator_stats stats; +}; + +struct fy_mremap_allocator { + struct fy_allocator a; + size_t pagesz; + size_t pageshift; + size_t big_alloc_threshold; /* bigger than that and a new allocation */ + size_t empty_threshold; /* less than that and get moved to full */ + size_t minimum_arena_size; /* the minimum arena size */ + float grow_ratio; + float balloon_ratio; + enum fy_mremap_arena_type arena_type; + fy_id_bits ids[FY_ID_BITS_ARRAY_COUNT_BITS(FY_MREMAP_TAG_MAX)]; + struct fy_mremap_tag tags[FY_MREMAP_TAG_MAX]; +}; + +extern const struct fy_allocator_ops fy_mremap_allocator_ops; + +#endif diff --git a/src/util/fy-allocator.c b/src/util/fy-allocator.c new file mode 100644 index 00000000..ce8ed67a --- /dev/null +++ b/src/util/fy-allocator.c @@ -0,0 +1,311 @@ +/* + * fy-allocator.c - allocators + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#ifndef FY_NON_LOCKING_REGISTRY +#include +#endif + +/* for container_of */ +#include "fy-list.h" +#include "fy-utils.h" + +#include "fy-allocator.h" +#include "fy-allocator-linear.h" +#include "fy-allocator-malloc.h" +#include "fy-allocator-mremap.h" +#include "fy-allocator-dedup.h" +#include "fy-allocator-auto.h" + +static struct fy_registered_allocator_entry_list allocator_registry_list; +static bool allocator_registry_initialized = false; +static bool allocator_registry_locked = false; + +#ifndef FY_NON_LOCKING_REGISTRY +static pthread_mutex_t allocator_registry_mutex = PTHREAD_MUTEX_INITIALIZER; +static inline void allocator_registry_lock(void) +{ + int rc FY_UNUSED; + + rc = pthread_mutex_lock(&allocator_registry_mutex); + assert(!rc); + + assert(!allocator_registry_locked); + allocator_registry_locked = true; +} + +static inline void allocator_registry_unlock(void) +{ + int rc FY_UNUSED; + + assert(allocator_registry_locked); + allocator_registry_locked = false; + + rc = pthread_mutex_unlock(&allocator_registry_mutex); + assert(!rc); +} +#else +static inline void allocator_registry_lock(void) +{ + /* nothing */ +} +static inline void allocator_registry_unlock(void) +{ + /* nothing */ +} +#endif + +static inline void allocator_registry_init(void) +{ + if (!allocator_registry_initialized) { + fy_registered_allocator_entry_list_init(&allocator_registry_list); + allocator_registry_initialized = true; + } +} + +static struct fy_registered_allocator_entry * +fy_registered_allocator_entry_create(const char *name, const struct fy_allocator_ops *ops) +{ + struct fy_registered_allocator_entry *ae; + + ae = malloc(sizeof(*ae)); + if (!ae) + return NULL; + memset(ae, 0, sizeof(*ae)); + ae->name = name; + ae->ops = ops; + + return ae; +} + +static void fy_registered_allocator_entry_destroy(struct fy_registered_allocator_entry *ae) +{ + if (!ae) + return; + free(ae); +} + +static const struct { + const char *name; + const struct fy_allocator_ops *ops; +} builtin_allocators[] = { + { + .name = "linear", + .ops = &fy_linear_allocator_ops, + }, { + .name = "malloc", + .ops = &fy_malloc_allocator_ops, + }, { + .name = "mremap", + .ops = &fy_mremap_allocator_ops, + }, { + .name = "dedup", + .ops = &fy_dedup_allocator_ops, + }, { + .name = "auto", + .ops = &fy_auto_allocator_ops, + } +}; + +int fy_allocator_register(const char *name, const struct fy_allocator_ops *ops) +{ + struct fy_registered_allocator_entry *ae; + unsigned int i; + int ret; + + if (!name || !ops || + !ops->setup || + !ops->cleanup || + !ops->create || + !ops->destroy || + !ops->dump || + !ops->alloc || + !ops->free || + !ops->update_stats || + !ops->store || + !ops->storev || + !ops->release | + !ops->get_tag || + !ops->release_tag || + !ops->trim_tag || + !ops->reset_tag || + !ops->get_areas || + !ops->get_single_area) + return -1; + + allocator_registry_lock(); + allocator_registry_init(); + + ret = -1; + + /* must not clash with the builtins */ + for (i = 0; i < ARRAY_SIZE(builtin_allocators); i++) { + if (!strcmp(builtin_allocators[i].name, name)) + goto out_unlock; + } + + /* must not clash with the other entries */ + for (ae = fy_registered_allocator_entry_list_head(&allocator_registry_list); ae; + ae = fy_registered_allocator_entry_next(&allocator_registry_list, ae)) { + if (!strcmp(ae->name, name)) + goto out_unlock; + } + + /* OK, create the entry */ + ae = fy_registered_allocator_entry_create(name, ops); + if (!ae) + goto out_unlock; + + /* and add it to the list */ + fy_registered_allocator_entry_list_add(&allocator_registry_list, ae); + + /* all clear */ + ret = 0; + +out_unlock: + allocator_registry_unlock(); + + return ret; +} + +int fy_allocator_unregister(const char *name) +{ + struct fy_registered_allocator_entry *ae; + unsigned int i; + int ret; + + ret = -1; + + allocator_registry_lock(); + allocator_registry_init(); + + /* must not try to unregister a builtin */ + for (i = 0; i < ARRAY_SIZE(builtin_allocators); i++) { + if (!strcmp(builtin_allocators[i].name, name)) + goto out_unlock; + } + + /* find the entry now */ + for (ae = fy_registered_allocator_entry_list_head(&allocator_registry_list); ae; + ae = fy_registered_allocator_entry_next(&allocator_registry_list, ae)) { + if (!strcmp(ae->name, name)) + break; + } + if (!ae) + goto out_unlock; + + fy_registered_allocator_entry_list_del(&allocator_registry_list, ae); + + /* and destroy it */ + fy_registered_allocator_entry_destroy(ae); + + ret = 0; + +out_unlock: + allocator_registry_unlock(); + + return ret; +} + +struct fy_allocator *fy_allocator_create(const char *name, const void *setupdata) +{ + struct fy_registered_allocator_entry *ae; + const struct fy_allocator_ops *ops = NULL; + unsigned int i; + + if (!name) + name = builtin_allocators[0].name; + + allocator_registry_lock(); + allocator_registry_init(); + + /* try the builtins first */ + for (i = 0; i < ARRAY_SIZE(builtin_allocators); i++) { + if (!strcmp(builtin_allocators[i].name, name)) { + ops = builtin_allocators[i].ops; + break; + } + } + + /* if not found there, try the registry */ + if (!ops) { + for (ae = fy_registered_allocator_entry_list_head(&allocator_registry_list); ae; + ae = fy_registered_allocator_entry_next(&allocator_registry_list, ae)) { + if (!strcmp(ae->name, name)) { + ops = ae->ops; + break; + } + } + } + allocator_registry_unlock(); + if (!ops) + return NULL; + + return ops->create(setupdata); +} + +void fy_allocator_registry_cleanup_internal(bool show_leftovers) +{ + struct fy_registered_allocator_entry *ae; + + if (!allocator_registry_initialized) + return; + + allocator_registry_lock(); + while ((ae = fy_registered_allocator_entry_list_pop(&allocator_registry_list)) != NULL) { + if (show_leftovers) + fprintf(stderr, "%s: destroying %s\n", __func__, ae->name); + fy_registered_allocator_entry_destroy(ae); + } + allocator_registry_unlock(); +} + +void fy_allocator_registry_cleanup(void) +{ + fy_allocator_registry_cleanup_internal(false); +} + +#ifdef FY_HAS_CONSTRUCTOR +static FY_CONSTRUCTOR void fy_allocator_registry_constructor(void) +{ + allocator_registry_init(); +} +#endif + +#ifdef FY_HAS_DESTRUCTOR +static FY_DESTRUCTOR void fy_allocator_registry_destructor(void) +{ + bool show_leftovers = false; + +#ifdef FY_DESTRUCTOR_SHOW_LEFTOVERS + show_leftovers = true; +#endif + + /* make sure the registry is not locked because we will hang */ + if (allocator_registry_locked) { + if (show_leftovers) + fprintf(stderr, "%s: refusing to work on locked registry\n", __func__); + return; + } + + fy_allocator_registry_cleanup_internal(show_leftovers); +} +#endif diff --git a/src/util/fy-allocator.h b/src/util/fy-allocator.h new file mode 100644 index 00000000..8fffd7a2 --- /dev/null +++ b/src/util/fy-allocator.h @@ -0,0 +1,193 @@ +/* + * fy-allocator.h - allocators + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_ALLOCATOR_H +#define FY_ALLOCATOR_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include "fy-typelist.h" + +struct fy_allocator; +struct fy_allocator_stats; + +typedef int fy_alloc_tag; + +#define FY_ALLOC_TAG_ERROR ((fy_alloc_tag)-1) +#define FY_ALLOC_TAG_NONE FY_ALLOC_TAG_ERROR + +struct fy_iovecw { + const void *data; + size_t size; +}; + +struct fy_allocator_stats; + +struct fy_allocator_ops { + int (*setup)(struct fy_allocator *a, const void *setupdata); + void (*cleanup)(struct fy_allocator *a); + struct fy_allocator *(*create)(const void *setupdata); + void (*destroy)(struct fy_allocator *a); + void (*dump)(struct fy_allocator *a); + void *(*alloc)(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align); + void (*free)(struct fy_allocator *a, fy_alloc_tag tag, void *data); + int (*update_stats)(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats); + const void *(*store)(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align); + const void *(*storev)(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align); + void (*release)(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size); + fy_alloc_tag (*get_tag)(struct fy_allocator *a, const void *tag_config); + void (*release_tag)(struct fy_allocator *a, fy_alloc_tag tag); + void (*trim_tag)(struct fy_allocator *a, fy_alloc_tag tag); + void (*reset_tag)(struct fy_allocator *a, fy_alloc_tag tag); + ssize_t (*get_areas)(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov); + const void *(*get_single_area)(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp); +}; + +struct fy_allocator_stats { + union { + struct { + uint64_t allocations; + uint64_t allocated; + uint64_t frees; + uint64_t freed; + uint64_t stores; + uint64_t stored; + uint64_t releases; + uint64_t released; + uint64_t dup_stores; + uint64_t dup_saved; + uint64_t system_claimed; + uint64_t system_free; + }; + uint64_t counters[12]; + }; +}; + +struct fy_allocator { + const char *name; + const struct fy_allocator_ops *ops; +}; + +struct fy_allocator *fy_allocator_create(const char *name, const void *setupdata); + +static inline void fy_allocator_destroy(struct fy_allocator *a) +{ + if (!a) + return; + a->ops->destroy(a); +} + +static inline void fy_allocator_dump(struct fy_allocator *a) +{ + if (!a) + return; + a->ops->dump(a); +} + +static inline int fy_allocator_update_stats(struct fy_allocator *a, fy_alloc_tag tag, struct fy_allocator_stats *stats) +{ + if (!a) + return -1; + return a->ops->update_stats(a, tag, stats); +} + +static inline void *fy_allocator_alloc(struct fy_allocator *a, fy_alloc_tag tag, size_t size, size_t align) +{ + if (!a) + return NULL; + return a->ops->alloc(a, tag, size, align); +} + +static inline void fy_allocator_free(struct fy_allocator *a, fy_alloc_tag tag, void *ptr) +{ + if (!a || !ptr) + return; + a->ops->free(a, tag, ptr); +} + +static inline const void *fy_allocator_store(struct fy_allocator *a, fy_alloc_tag tag, const void *data, size_t size, size_t align) +{ + if (!a) + return NULL; + return a->ops->store(a, tag, data, size, align); +} + +static inline const void *fy_allocator_storev(struct fy_allocator *a, fy_alloc_tag tag, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + if (!a) + return NULL; + return a->ops->storev(a, tag, iov, iovcnt, align); +} + +static inline void fy_allocator_release(struct fy_allocator *a, fy_alloc_tag tag, const void *ptr, size_t size) +{ + if (!a || !ptr) + return; + a->ops->release(a, tag, ptr, size); +} + +static inline fy_alloc_tag fy_allocator_get_tag(struct fy_allocator *a, const void *tag_config) +{ + if (!a) + return 0; + return a->ops->get_tag(a, tag_config); +} + +static inline void fy_allocator_release_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + if (!a) + return; + a->ops->release_tag(a, tag); +} + +static inline void fy_allocator_trim_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + if (!a) + return; + a->ops->trim_tag(a, tag); +} + +static inline void fy_allocator_reset_tag(struct fy_allocator *a, fy_alloc_tag tag) +{ + if (!a) + return; + a->ops->reset_tag(a, tag); +} + +static inline ssize_t fy_allocator_get_areas(struct fy_allocator *a, fy_alloc_tag tag, struct fy_iovecw *iov, size_t maxiov) +{ + if (!a) + return -1; + return a->ops->get_areas(a, tag, iov, maxiov); +} + +static inline const void *fy_allocator_get_single_area(struct fy_allocator *a, fy_alloc_tag tag, size_t *sizep, size_t *startp, size_t *allocp) +{ + if (!a) + return NULL; + return a->ops->get_single_area(a, tag, sizep, startp, allocp); +} + +FY_TYPE_FWD_DECL_LIST(registered_allocator_entry); +struct fy_registered_allocator_entry { + struct list_head node; + const char *name; + const struct fy_allocator_ops *ops; +}; +FY_TYPE_DECL_LIST(registered_allocator_entry); + +int fy_allocator_register(const char *name, const struct fy_allocator_ops *ops); +int fy_allocator_unregister(const char *name); + +#endif diff --git a/src/util/fy-generic-decoder.c b/src/util/fy-generic-decoder.c new file mode 100644 index 00000000..828e6a29 --- /dev/null +++ b/src/util/fy-generic-decoder.c @@ -0,0 +1,894 @@ +/* + * fy-generic-decoder.h - generic decoder (yaml -> generic) + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include + +#include "fy-docstate.h" +#include "fy-diag.h" +#include "fy-parse.h" + +#include "fy-generic.h" +#include "fy-generic-decoder.h" + +static struct fy_generic_decoder_obj *fy_generic_decoder_object_create(struct fy_generic_decoder *gd, + enum fy_generic_decoder_object_type type, fy_generic anchor, fy_generic tag); +static void fy_generic_decoder_object_destroy(struct fy_generic_decoder_obj *gdo); +static fy_generic fy_generic_decoder_object_finalize(struct fy_generic_decoder *gd, struct fy_generic_decoder_obj *gdo); +static fy_generic fy_generic_decoder_object_finalize_and_destroy(struct fy_generic_decoder *gd, + struct fy_generic_decoder_obj *gdo); + +static int fy_generic_decoder_object_add_item(struct fy_generic_decoder_obj *gdo, fy_generic item); +static fy_generic fy_generic_decoder_create_scalar(struct fy_generic_decoder *gd, struct fy_event *fye, + fy_generic va, fy_generic vt); + +static int fy_generic_decoder_anchor_register(struct fy_generic_decoder *fygd, fy_generic anchor, fy_generic content); +static fy_generic fy_generic_decoder_alias_resolve(struct fy_generic_decoder *fygd, fy_generic anchor); +static bool fy_generic_decoder_alias_is_collecting(struct fy_generic_decoder *fygd, fy_generic anchor); +static void fy_generic_decoder_anchor_collection_starts(struct fy_generic_decoder *fygd); +static void fy_generic_decoder_anchor_collection_ends(struct fy_generic_decoder *fygd, fy_generic v); + +struct fy_generic_decoder_obj * +fy_generic_decoder_object_create(struct fy_generic_decoder *gd, enum fy_generic_decoder_object_type type, + fy_generic anchor, fy_generic tag) +{ + struct fy_generic_decoder_obj *gdo; + + if (!gd || !fy_generic_decoder_object_type_is_valid(type)) + return NULL; + + gdo = malloc(sizeof(*gdo)); + if (!gdo) + return NULL; + + memset(gdo, 0, sizeof(*gdo)); + gdo->type = type; + gdo->anchor = anchor; + gdo->tag = tag; + + gdo->v = fy_invalid; + gdo->vds = fy_invalid; + + return gdo; +} + +static void +fy_generic_decoder_object_destroy(struct fy_generic_decoder_obj *gdo) +{ + if (!gdo) + return; + + if (gdo->items) + free(gdo->items); + if (gdo->fyds) + fy_document_state_unref(gdo->fyds); + free(gdo); +} + +static fy_generic +fy_generic_decoder_object_finalize(struct fy_generic_decoder *gd, struct fy_generic_decoder_obj *gdo) +{ + fy_generic v, vi; + bool needs_indirect; + const struct fy_version *vers; + struct fy_tag **tags; + size_t i, count; + fy_generic *vtags_items; + fy_generic vtags; + bool version_explicit; + bool tags_explicit; + + v = fy_invalid; + + assert(gdo); + + switch (gdo->type) { + + case FYGDOT_ROOT: + if (gdo->count > 1) + return fy_invalid; + + if (gdo->count == 0) + v = fy_null; + else + v = gdo->items[0]; + + break; + + case FYGDOT_SEQUENCE: + v = fy_generic_sequence_create(gd->gb, gdo->count, gdo->items); + break; + + case FYGDOT_MAPPING: + assert((gdo->count % 2) == 0); + v = fy_generic_mapping_create(gd->gb, gdo->count / 2, gdo->items); + break; + + default: + assert(0); + abort(); + break; + } + + needs_indirect = !gd->resolve && + ((gdo->anchor != fy_null && gdo->anchor != fy_invalid) || + (gdo->tag != fy_null && gdo->tag != fy_invalid)); + + if (needs_indirect) { + struct fy_generic_indirect gi = { + .value = v, + .anchor = gdo->anchor, + .tag = gdo->tag, + }; + + vi = fy_generic_indirect_create(gd->gb, &gi); + assert(vi != fy_invalid); + v = vi; + } + + if (gdo->items) + free(gdo->items); + + gdo->items = NULL; + gdo->alloc = 0; + gdo->count = 0; + + gdo->v = v; + + /* root object, create the document state too */ + if (gdo->type == FYGDOT_ROOT && gdo->fyds) { + vers = fy_document_state_version(gdo->fyds); + assert(vers); + + tags = fy_document_state_tag_directives(gdo->fyds); + assert(tags); + count = 0; + while (tags[count]) + count++; + + version_explicit = fy_document_state_version_explicit(gdo->fyds); + tags_explicit = fy_document_state_tags_explicit(gdo->fyds); + + vtags_items = alloca(sizeof(*vtags_items) * count); + for (i = 0; i < count; i++) + vtags_items[i] = fy_generic_mapping_create(gd->gb, 2, (fy_generic[]) { + fy_generic_string_create(gd->gb, "handle"), fy_generic_string_create(gd->gb, tags[i]->handle), + fy_generic_string_create(gd->gb, "prefix"), fy_generic_string_create(gd->gb, tags[i]->prefix)}); + + vtags = fy_generic_sequence_create(gd->gb, count, vtags_items); + + gdo->vds = fy_generic_mapping_create(gd->gb, 4, (fy_generic[]) { + fy_generic_string_create(gd->gb, "version"), fy_generic_mapping_create(gd->gb, 2, (fy_generic[]) { + fy_generic_string_create(gd->gb, "major"), fy_generic_int_create(gd->gb, vers->major), + fy_generic_string_create(gd->gb, "minor"), fy_generic_int_create(gd->gb, vers->minor)}), + fy_generic_string_create(gd->gb, "version-explicit"), fy_generic_bool_create(gd->gb, version_explicit), + fy_generic_string_create(gd->gb, "tags"), vtags, + fy_generic_string_create(gd->gb, "tags-explicit"), fy_generic_bool_create(gd->gb, tags_explicit)}); + + free(tags); + } + + return v; +} + +static fy_generic +fy_generic_decoder_object_finalize_and_destroy(struct fy_generic_decoder *gd, struct fy_generic_decoder_obj *gdo) +{ + fy_generic v; + + v = fy_generic_decoder_object_finalize(gd, gdo); + fy_generic_decoder_object_destroy(gdo); + + return v; +} + +static inline int +fy_generic_item_append(fy_generic **itemsp, size_t *countp, size_t *allocp, fy_generic v) +{ + size_t new_alloc; + fy_generic *new_items; + + if (*countp >= *allocp) { + new_alloc = *allocp * 2; + if (new_alloc < 32) + new_alloc = 32; + + new_items = realloc(*itemsp, new_alloc * sizeof(*new_items)); + if (!new_items) + return -1; + *itemsp = new_items; + *allocp = new_alloc; + } + + (*itemsp)[(*countp)++] = v; + return 0; +} + +static inline bool +fy_generic_decoder_object_mapping_on_key(struct fy_generic_decoder_obj *gdo) +{ + return gdo && gdo->type == FYGDOT_MAPPING && (gdo->count & 1) == 0; +} + +static inline bool +fy_generic_decoder_object_mapping_on_value(struct fy_generic_decoder_obj *gdo) +{ + return gdo && gdo->type == FYGDOT_MAPPING && (gdo->count & 1) == 1; +} + +static inline void +fy_generic_decoder_object_mapping_expect_merge_key_value(struct fy_generic_decoder_obj *gdo) +{ + if (!gdo || gdo->type != FYGDOT_MAPPING) + return; + gdo->next_is_merge_args = true; +} + +static inline bool +fy_generic_decoder_object_mapping_on_merge_key_value(struct fy_generic_decoder_obj *gdo) +{ + return gdo && gdo->type == FYGDOT_MAPPING && gdo->next_is_merge_args; +} + +static int +fy_generic_decoder_object_handle_merge_key_value(struct fy_generic_decoder_obj *gdo, fy_generic item) +{ + const fy_generic *pairs, *items; + fy_generic *tmp_pairs = NULL; + fy_generic vk, vv; + size_t i, j, k, l, count, map_count, total_count = 0; + int rc; + + if (!fy_generic_decoder_object_mapping_on_merge_key_value(gdo)) + return 1; /* not a merge key value */ + + assert(gdo->next_is_merge_args); + + gdo->next_is_merge_args = false; + + if (fy_generic_get_type(item) == FYGT_MAPPING) { + pairs = fy_generic_mapping_get_pairs(item, &count); + count *= 2; + for (i = 0; i < count; i++) { + rc = fy_generic_item_append(&gdo->items, &gdo->count, &gdo->alloc, pairs[i]); + if (rc) + return -1; + } + + return 0; + } + + /* it must be a sequence then */ + if (fy_generic_get_type(item) != FYGT_SEQUENCE) + return -1; + + total_count = 0; + items = fy_generic_sequence_get_items(item, &map_count); + for (j = 0; j < map_count; j++) { + /* must be mapping, and check for it */ + if (fy_generic_get_type(items[j]) != FYGT_MAPPING) + return -1; + total_count += fy_generic_mapping_get_pair_count(items[j]); + } + + /* nothing? alright then */ + if (total_count == 0) + return 0; + + /* allocate worst case */ + if (total_count <= 32) + tmp_pairs = alloca(sizeof(*tmp_pairs) * total_count * 2); + else { + tmp_pairs = malloc(sizeof(*tmp_pairs) * total_count * 2); + if (!tmp_pairs) + return -1; + } + + k = 0; + for (j = 0; j < map_count; j++) { + pairs = fy_generic_mapping_get_pairs(items[j], &count); + for (i = 0; i < count; i++) { + vk = pairs[i * 2]; + vv = pairs[i * 2 + 1]; + + /* check if key already exists */ + for (l = 0; l < k; l++) { + if (fy_generic_compare(vk, tmp_pairs[l * 2]) == 0) + break; + } + /* already exists in tmp map, skip */ + if (l < k) + continue; + + assert(k < total_count); + tmp_pairs[k * 2] = vk; + tmp_pairs[k * 2 + 1] = vv; + k++; + } + } + + /* ok, insert whatever is in tmp_pairs to the current map */ + for (l = 0; l < k * 2; l++) { + rc = fy_generic_item_append(&gdo->items, &gdo->count, &gdo->alloc, tmp_pairs[l]); + if (rc) + goto err_out; + } + + if (total_count > 32) + free(tmp_pairs); + + return 0; + +err_out: + if (tmp_pairs && total_count > 32) + free(tmp_pairs); + return -1; +} + +static int +fy_generic_decoder_object_add_item(struct fy_generic_decoder_obj *gdo, fy_generic item) +{ + assert(gdo); + return fy_generic_item_append(&gdo->items, &gdo->count, &gdo->alloc, item); +} + +static fy_generic +fy_generic_decoder_create_scalar(struct fy_generic_decoder *gd, struct fy_event *fye, fy_generic va, fy_generic vt) +{ + struct fy_token *fyt; + enum fy_scalar_style style; + bool needs_indirect; + const char *text; + size_t len; + bool is_string; + fy_generic v, vi; + + assert(fye); + assert(fye->type == FYET_SCALAR); + + fyt = fy_event_get_token(fye); + assert(fyt); + + text = fy_token_get_text(fyt, &len); + assert(text); + + style = fy_token_scalar_style(fyt); + if (style != FYSS_PLAIN) + is_string = true; + else + is_string = true; /* all strings for now */ + + v = fy_invalid; + if (is_string) + v = fy_generic_string_size_create(gd->gb, text, len); + + needs_indirect = !gd->resolve && + ((va != fy_null && va != fy_invalid) || + (vt != fy_null && vt != fy_invalid)); + + if (needs_indirect) { + struct fy_generic_indirect gi = { + .value = v, + .anchor = va, + .tag = vt, + }; + + vi = fy_generic_indirect_create(gd->gb, &gi); + assert(vi != fy_invalid); + v = vi; + } + + if (v == fy_invalid) + return fy_invalid; + + return v; +} + +static bool +fy_generic_decoder_is_merge_key(struct fy_generic_decoder *gd, struct fy_generic_decoder_obj *gdop, struct fy_event *fye) +{ + return gd && gdop && fye && + fye->type == FYET_SCALAR && + gd->gdo_root && gd->resolve && gd->gdo_root->supports_merge_key && + fy_generic_decoder_object_mapping_on_key(gdop) && + fy_atom_is_merge_key(fy_token_atom(fye->scalar.value)); +} + +static bool +fy_generic_decoder_is_valid_merge_key_arg(struct fy_generic_decoder *gd, struct fy_generic_decoder_obj *gdop, fy_generic v) +{ + enum fy_generic_type type; + const fy_generic *items; + size_t i, count; + + if (!gd || !gd->resolve) + return false; + + type = fy_generic_get_type(v); + + /* mapping? OK */ + if (type == FYGT_MAPPING) + return true; + + /* must be a sequence now */ + if (type != FYGT_SEQUENCE) + return false; + + /* the sequence must be nothing but mappings */ + items = fy_generic_sequence_get_items(v, &count); + for (i = 0; i < count; i++) { + if (fy_generic_get_type(items[i]) != FYGT_MAPPING) + return false; + } + + /* all well */ + return true; +} + +static enum fy_composer_return +fy_generic_compose_process_event(struct fy_parser *fyp, struct fy_event *fye, struct fy_path *path, void *userdata) +{ + struct fy_generic_decoder *gd = userdata; + struct fy_generic_decoder_obj *gdo, *gdop = NULL; + struct fy_token *fyt_anchor, *fyt_tag; + const struct fy_version *vers; + const char *anchor, *tag; + size_t anchor_size, tag_size; + enum fy_composer_return ret; + fy_generic v, va, vt; + int rc __FY_DEBUG_UNUSED__; + + assert(gd); + if (gd->verbose) { + fprintf(stderr, "%s: %c%c%c%c%c %3d - %-32s\n", + fy_event_type_get_text(fye->type), + fy_path_in_root(path) ? 'R' : '-', + fy_path_in_sequence(path) ? 'S' : '-', + fy_path_in_mapping(path) ? 'M' : '-', + fy_path_in_mapping_key(path) ? 'K' : + fy_path_in_mapping_value(path) ? 'V' : '-', + fy_path_in_collection_root(path) ? '/' : '-', + fy_path_depth(path), + fy_path_get_text_alloca(path)); + } + + + fyt_anchor = fy_event_get_anchor_token(fye); + if (fyt_anchor) { + anchor = fy_token_get_text(fyt_anchor, &anchor_size); + fyp_error_check(fyp, anchor, err_out, "fy_token_get_text() failed"); + + va = fy_generic_string_size_create(gd->gb, anchor, anchor_size); + fyp_error_check(fyp, va != fy_invalid, err_out, "fy_generic_string_size_create() failed"); + + } else { + anchor = NULL; + anchor_size = 0; + va = fy_null; + } + + fyt_tag = fy_event_get_tag_token(fye); + if (fyt_tag) { + tag = fy_tag_token_short(fyt_tag, &tag_size); + fyp_error_check(fyp, tag, err_out, "fy_token_get_text() failed"); + + vt = fy_generic_string_size_create(gd->gb, tag, tag_size); + fyp_error_check(fyp, va != fy_invalid, err_out, "fy_generic_string_size_create() failed"); + } else { + tag = NULL; + tag_size = 0; + vt = fy_null; + } + + ret = FYCR_OK_CONTINUE; + switch (fye->type) { + + case FYET_STREAM_START: + case FYET_STREAM_END: + ret = FYCR_OK_CONTINUE; + break; + + case FYET_ALIAS: + anchor = fy_token_get_text(fy_event_get_token(fye), &anchor_size); + fyp_error_check(fyp, anchor, err_out, "fy_token_get_text() failed"); + + if (gd->resolve) { + v = fy_generic_decoder_alias_resolve(gd, + fy_generic_string_size_alloca(anchor, anchor_size)); + if (v == fy_invalid) { + fy_parser_report_error(fyp, fy_event_get_token(fye), + !fy_generic_decoder_alias_is_collecting(gd, va) ? + "Unable to resolve alias" : + "Recursive reference to alias"); + goto err_out; + } + } else { + v = fy_generic_alias_create(gd->gb, + fy_generic_string_size_create(gd->gb, anchor, anchor_size)); + fyp_error_check(fyp, v != fy_invalid, err_out, "fy_generic_alias_create() failed"); + } + + anchor = NULL; + goto add_item; + + case FYET_SCALAR: + + gdop = fy_path_get_parent_user_data(path); + + if (fy_generic_decoder_is_merge_key(gd, gdop, fye)) { + fy_generic_decoder_object_mapping_expect_merge_key_value(gdop); + ret = FYCR_OK_CONTINUE; + break; + } + + v = fy_generic_decoder_create_scalar(gd, fye, va, vt); + assert(v != fy_invalid); + + goto add_item; + + case FYET_DOCUMENT_START: + + gdo = fy_generic_decoder_object_create(gd, FYGDOT_ROOT, fy_invalid, fy_invalid); + fyp_error_check(fyp, gdo, err_out, "fy_generic_decoder_object_create() failed"); + + gdo->fyds = fy_document_state_ref(fye->document_start.document_state); + assert(gdo->fyds); + + fy_path_set_root_user_data(path, gdo); + + vers = fy_document_state_version(gdo->fyds); + assert(vers); + + gdo->supports_merge_key = vers->major == 1 && vers->minor == 1; + + gd->gdo_root = gdo; + + ret = FYCR_OK_CONTINUE; + break; + + case FYET_SEQUENCE_START: + case FYET_MAPPING_START: + gdo = fy_generic_decoder_object_create(gd, + fye->type == FYET_SEQUENCE_START ? FYGDOT_SEQUENCE : FYGDOT_MAPPING, + va, vt); + fyp_error_check(fyp, gdo, err_out, "fy_generic_decoder_object_create() failed"); + + fy_path_set_last_user_data(path, gdo); + ret = FYCR_OK_CONTINUE; + + if (gd->resolve && anchor) { + rc = fy_generic_decoder_anchor_register(gd, va, fy_invalid); + fyp_error_check(fyp, !rc, err_out, "fy_generic_decoder_anchor_register() failed"); + fy_generic_decoder_anchor_collection_starts(gd); + } + + break; + + case FYET_DOCUMENT_END: + gdo = fy_path_get_root_user_data(path); + fy_path_set_root_user_data(path, NULL); + + v = fy_generic_decoder_object_finalize(gd, gdo); + fyp_error_check(fyp, v != fy_invalid, err_out, "fy_generic_decoder_object_finalize() failed"); + + gd->vroot = v; + gd->vds = gdo->vds; + + fy_generic_decoder_object_destroy(gdo); + gd->document_ready = true; + + gd->gdo_root = NULL; + + /* we always stop at the end of the document + * to give control back to the decoder to + * pick up the document */ + ret = FYCR_OK_STOP; + break; + + case FYET_SEQUENCE_END: + case FYET_MAPPING_END: + + gdop = fy_path_get_parent_user_data(path); + + gdo = fy_path_get_last_user_data(path); + fy_path_set_last_user_data(path, NULL); + + v = fy_generic_decoder_object_finalize_and_destroy(gd, gdo); + fyp_error_check(fyp, v != fy_invalid, err_out, "fy_generic_decoder_object_finalize_and_destroy() failed"); + + goto add_item; + + case FYET_NONE: + /* this is cleanup phase after an error */ + if (!fy_path_in_root(path)) { + gdo = fy_path_get_last_user_data(path); + fy_path_set_last_user_data(path, NULL); + } else { + gdo = fy_path_get_root_user_data(path); + fy_path_set_root_user_data(path, NULL); + } + if (gdo) + fy_generic_decoder_object_destroy(gdo); + + break; + } + + return ret; + +add_item: + if (!gdop) + gdop = fy_path_get_parent_user_data(path); + + assert(gdop); + + if (gd->resolve && (fye->type == FYET_SEQUENCE_END || fye->type == FYET_MAPPING_END)) + fy_generic_decoder_anchor_collection_ends(gd, v); + + if (gd->resolve && anchor) { + rc = fy_generic_decoder_anchor_register(gd, va, v); + fyp_error_check(fyp, !rc, err_out, "fy_generic_decoder_anchor_register() failed"); + } + + if (fy_generic_decoder_object_mapping_on_merge_key_value(gdop)) { + + if (!fy_generic_decoder_is_valid_merge_key_arg(gd, gdop, v)) { + fy_parser_report_error(fyp, fy_event_get_token(fye), + "Invalid merge key argument: must be a mapping or a sequence of mappings"); + goto err_out; + } + + rc = fy_generic_decoder_object_handle_merge_key_value(gdop, v); + fyp_error_check(fyp, !rc, err_out, "fy_generic_decoder_object_handle_merge_key_value() failed"); + + } else { + + rc = fy_generic_decoder_object_add_item(gdop, v); + fyp_error_check(fyp, !rc, err_out, "fy_generic_decoder_object_add_item() failed"); + } + + return FYCR_OK_CONTINUE; + +err_out: + return FYCR_ERROR; +} + +static int fy_generic_decoder_anchor_register(struct fy_generic_decoder *fygd, fy_generic anchor, fy_generic content) +{ + struct fy_generic_anchor *ga = NULL; + + ga = malloc(sizeof(*ga)); + if (!ga) + goto err_out; + memset(ga, 0, sizeof(*ga)); + ga->anchor = anchor; + ga->content = content; + + /* no content yet? collecting */ + if (content == fy_invalid) + fy_generic_anchor_list_add(&fygd->collecting_anchors, ga); + else + fy_generic_anchor_list_add(&fygd->complete_anchors, ga); + + return 0; +err_out: + return -1; +} + +static fy_generic fy_generic_decoder_alias_resolve(struct fy_generic_decoder *fygd, fy_generic anchor) +{ + struct fy_generic_anchor *ga; + + for (ga = fy_generic_anchor_list_head(&fygd->complete_anchors); ga; + ga = fy_generic_anchor_next(&fygd->complete_anchors, ga)) { + if (!fy_generic_compare(ga->anchor, anchor)) + return ga->content; + } + return fy_invalid; +} + +static bool fy_generic_decoder_alias_is_collecting(struct fy_generic_decoder *fygd, fy_generic anchor) +{ + struct fy_generic_anchor *ga; + + for (ga = fy_generic_anchor_list_head(&fygd->complete_anchors); ga; + ga = fy_generic_anchor_next(&fygd->complete_anchors, ga)) { + if (!fy_generic_compare(ga->anchor, anchor)) + return true; + } + return false; +} + +static void fy_generic_decoder_anchor_collection_starts(struct fy_generic_decoder *fygd) +{ + struct fy_generic_anchor *ga; + + /* just increase the nest for all collecting */ + for (ga = fy_generic_anchor_list_head(&fygd->collecting_anchors); ga; + ga = fy_generic_anchor_next(&fygd->collecting_anchors, ga)) + ga->nest++; +} + +static void fy_generic_decoder_anchor_collection_ends(struct fy_generic_decoder *fygd, fy_generic v) +{ + struct fy_generic_anchor *ga, *gan; + + for (ga = fy_generic_anchor_list_head(&fygd->collecting_anchors); ga; ga = gan) { + gan = fy_generic_anchor_next(&fygd->collecting_anchors, ga); + + assert(ga->nest > 0); + ga->nest--; + if (ga->nest > 0) + continue; + + assert(ga->content == fy_invalid); + + /* move from collecting to complete list */ + fy_generic_anchor_list_del(&fygd->collecting_anchors, ga); + ga->content = v; + fy_generic_anchor_list_add(&fygd->complete_anchors, ga); + } +} + +void fy_generic_decoder_destroy(struct fy_generic_decoder *fygd) +{ + struct fy_parser *fyp; + struct fy_generic_anchor *ga; + + if (!fygd) + return; + + fyp = fygd->fyp; + if (fyp) { + if (fygd->resolve) + fyp->cfg.flags |= FYPCF_RESOLVE_DOCUMENT; + else + fyp->cfg.flags &= ~FYPCF_RESOLVE_DOCUMENT; + } + + while ((ga = fy_generic_anchor_list_pop(&fygd->collecting_anchors)) != NULL) + free(ga); + + while ((ga = fy_generic_anchor_list_pop(&fygd->complete_anchors)) != NULL) + free(ga); + + free(fygd); +} + +struct fy_generic_decoder * +fy_generic_decoder_create(struct fy_parser *fyp, struct fy_generic_builder *gb, bool verbose) +{ + struct fy_generic_decoder *fygd = NULL; + + if (!fyp || !gb) + return NULL; + + fygd = malloc(sizeof(*fygd)); + if (!fygd) + goto err_out; + memset(fygd, 0, sizeof(*fygd)); + + fygd->fyp = fyp; + fygd->gb = gb; + fygd->verbose = verbose; + fygd->resolve = !!(fyp->cfg.flags & FYPCF_RESOLVE_DOCUMENT); + fygd->vroot = fy_invalid; + fygd->vds = fy_invalid; + + fy_generic_anchor_list_init(&fygd->complete_anchors); + fy_generic_anchor_list_init(&fygd->collecting_anchors); + + /* turn off the stream resolve */ + fyp->cfg.flags &= ~FYPCF_RESOLVE_DOCUMENT; + + return fygd; + +err_out: + fy_generic_decoder_destroy(fygd); + return NULL; +} + +fy_generic fy_generic_decoder_parse_document(struct fy_generic_decoder *fygd, fy_generic *vdsp) +{ + fy_generic vroot; + int rc; + + if (!fygd) + return fy_invalid; + + rc = fy_parse_compose(fygd->fyp, fy_generic_compose_process_event, fygd); + if (rc) + goto err_out; + + if (fy_parser_get_stream_error(fygd->fyp)) + goto err_out; + + vroot = fygd->vroot; + + if (vdsp) + *vdsp = fygd->vds; + + fygd->vroot = fy_invalid; + fygd->vds = fy_invalid; + + return vroot; + +err_out: + if (vdsp) + *vdsp = fy_invalid; + return fy_invalid; +} + +void fy_generic_decoder_reset(struct fy_generic_decoder *fygd) +{ + struct fy_generic_anchor *ga; + + if (!fygd) + return; + + /* reset the anchors */ + while ((ga = fy_generic_anchor_list_pop(&fygd->collecting_anchors)) != NULL) + free(ga); + + while ((ga = fy_generic_anchor_list_pop(&fygd->complete_anchors)) != NULL) + free(ga); + + if (fygd->gb) + fy_generic_builder_reset(fygd->gb); +} + +fy_generic fy_generic_decoder_parse_all_documents(struct fy_generic_decoder *fygd) +{ + fy_generic vroot, vds, ventry, vdir; + fy_generic *items = NULL, *items_new; + size_t count, alloc; + + count = 0; + alloc = 0; + items = NULL; + + while ((vroot = fy_generic_decoder_parse_document(fygd, &vds)) != fy_invalid) { + + ventry = fy_generic_mapping_create(fygd->gb, 2, (fy_generic[]){ + fy_generic_string_create(fygd->gb, "root"), vroot, + fy_generic_string_create(fygd->gb, "docs"), vds }); + + if (ventry == fy_invalid) + goto err_out; + + if (count >= alloc) { + if (!alloc) + alloc = 8; + alloc *= 2; + items_new = realloc(items, alloc * sizeof(*items)); + if (!items_new) + goto err_out; + items = items_new; + } + assert(count < alloc); + items[count++] = ventry; + } + + if (!count) + return fy_null; + + vdir = fy_generic_sequence_create(fygd->gb, count, items); + free(items); + + return vdir; + +err_out: + if (items) + free(items); + return fy_invalid; +} diff --git a/src/util/fy-generic-decoder.h b/src/util/fy-generic-decoder.h new file mode 100644 index 00000000..abbe7972 --- /dev/null +++ b/src/util/fy-generic-decoder.h @@ -0,0 +1,81 @@ +/* + * fy-generic-decoder.h - generic decoder (yaml -> generic) + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_GENERIC_DECODER_H +#define FY_GENERIC_DECODER_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "fy-docstate.h" +#include "fy-allocator.h" +#include "fy-generic.h" + +struct fy_generic_decoder_obj; + +FY_TYPE_FWD_DECL_LIST(generic_anchor); +struct fy_generic_anchor { + struct list_head node; + fy_generic anchor; + fy_generic content; + int nest; +}; +FY_TYPE_DECL_LIST(generic_anchor); + +struct fy_generic_decoder { + struct fy_parser *fyp; + struct fy_generic_builder *gb; + bool verbose; + bool document_ready; + bool resolve; + bool single_document; + fy_generic vroot; + fy_generic vds; + struct fy_generic_anchor_list complete_anchors; + struct fy_generic_anchor_list collecting_anchors; + struct fy_generic_decoder_obj *gdo_root; +}; + +enum fy_generic_decoder_object_type { + FYGDOT_SEQUENCE, + FYGDOT_MAPPING, + FYGDOT_ROOT, +}; + +static inline bool +fy_generic_decoder_object_type_is_valid(enum fy_generic_decoder_object_type type) +{ + return type >= FYGDOT_SEQUENCE && type <= FYGDOT_ROOT; +} + +struct fy_generic_decoder_obj { + enum fy_generic_decoder_object_type type; + size_t alloc; + size_t count; + fy_generic *items; + fy_generic v; + fy_generic anchor; + fy_generic tag; + /* for the root */ + struct fy_document_state *fyds; + fy_generic vds; + bool supports_merge_key : 1; + /* for mapping, special merge key */ + bool next_is_merge_args : 1; +}; + +struct fy_generic_decoder * +fy_generic_decoder_create(struct fy_parser *fyp, struct fy_generic_builder *gb, bool verbose); +void fy_generic_decoder_destroy(struct fy_generic_decoder *fygd); +fy_generic fy_generic_decoder_parse_document(struct fy_generic_decoder *fygd, fy_generic *vdsp); +fy_generic fy_generic_decoder_parse_all_documents(struct fy_generic_decoder *fygd); +void fy_generic_decoder_reset(struct fy_generic_decoder *fygd); + +#endif diff --git a/src/util/fy-generic-encoder.c b/src/util/fy-generic-encoder.c new file mode 100644 index 00000000..af10f0af --- /dev/null +++ b/src/util/fy-generic-encoder.c @@ -0,0 +1,337 @@ +/* + * fy-generic-encoder.h - generic encoder (generic -> yaml) + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include + +#include "fy-diag.h" +#include "fy-emit.h" + +#include "fy-generic.h" +#include "fy-generic-encoder.h" + +struct fy_generic_encoder * +fy_generic_encoder_create(struct fy_emitter *emit, bool verbose) +{ + struct fy_generic_encoder *fyge; + + if (!emit) + return NULL; + + fyge = malloc(sizeof(*fyge)); + if (!fyge) + return NULL; + memset(fyge, 0, sizeof(*fyge)); + + fyge->emit = emit; + fyge->verbose = verbose; + + return fyge; +} + +void fy_generic_encoder_destroy(struct fy_generic_encoder *fyge) +{ + if (!fyge) + return; + + (void)fy_generic_encoder_sync(fyge); + + free(fyge); +} + +int fy_generic_encoder_emit(struct fy_generic_encoder *fyge, fy_generic root, fy_generic vds) +{ + return 0; +} + +int fy_encode_generic(struct fy_generic_encoder *fyge, fy_generic v); + +int fy_encode_generic_null(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + return fy_emit_scalar_printf(fyge->emit, FYSS_PLAIN, anchor, tag, "null"); +} + +int fy_encode_generic_bool(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + const char *text; + size_t sz; + + if (v == fy_true) { + text = "true"; + sz = 4; + } else { + text = "false"; + sz = 5; + } + return fy_emit_scalar_write(fyge->emit, FYSS_PLAIN, anchor, tag, text, sz); +} + +int fy_encode_generic_int(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + return fy_emit_scalar_printf(fyge->emit, FYSS_PLAIN, anchor, tag, "%lld", + fy_generic_get_int(v)); +} + +int fy_encode_generic_float(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + return fy_emit_scalar_printf(fyge->emit, FYSS_PLAIN, anchor, tag, "%g", + fy_generic_get_float(v)); +} + +int fy_encode_generic_string(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + const char *str; + size_t len; + + str = fy_generic_get_string_size_alloca(v, &len); + return fy_emit_scalar_write(fyge->emit, FYSS_ANY, anchor, tag, str, len); +} + +int fy_encode_generic_sequence(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + const fy_generic *items; + size_t i, count; + int rc; + + rc = fy_emit_eventf(fyge->emit, FYET_SEQUENCE_START, FYNS_ANY, anchor, tag); + if (rc) + goto err_out; + + items = fy_generic_sequence_get_items(v, &count); + for (i = 0; i < count; i++) { + rc = fy_encode_generic(fyge, items[i]); + if (rc) + goto err_out; + } + + rc = fy_emit_eventf(fyge->emit, FYET_SEQUENCE_END); + if (rc) + goto err_out; + + return 0; +err_out: + return -1; +} + +int fy_encode_generic_mapping(struct fy_generic_encoder *fyge, const char *anchor, const char *tag, fy_generic v) +{ + const fy_generic *pairs; + size_t i, count; + int rc; + + rc = fy_emit_eventf(fyge->emit, FYET_MAPPING_START, FYNS_ANY, anchor, tag); + if (rc) + goto err_out; + + pairs = fy_generic_mapping_get_pairs(v, &count); + count *= 2; + for (i = 0; i < count; i++) { + rc = fy_encode_generic(fyge, pairs[i]); + if (rc) + goto err_out; + } + rc = fy_emit_eventf(fyge->emit, FYET_MAPPING_END); + if (rc) + goto err_out; + + return 0; +err_out: + return -1; +} + +int fy_encode_generic_alias(struct fy_generic_encoder *fyge, fy_generic v) +{ + return fy_emit_eventf(fyge->emit, FYET_ALIAS, fy_generic_get_alias_alloca(v)); +} + +int fy_encode_generic(struct fy_generic_encoder *fyge, fy_generic v) +{ + struct fy_generic_indirect gi; + const char *anchor = NULL, *tag = NULL; + + if (fy_generic_is_indirect(v)) { + fy_generic_indirect_get(v, &gi); + if (fy_generic_get_type(gi.anchor) == FYGT_STRING) + anchor = fy_generic_get_string_alloca(gi.anchor); + if (fy_generic_get_type(gi.tag) == FYGT_STRING) + tag = fy_generic_get_string_alloca(gi.tag); + } + + switch (fy_generic_get_type(v)) { + case FYGT_NULL: + return fy_encode_generic_null(fyge, anchor, tag, v); + + case FYGT_BOOL: + return fy_encode_generic_bool(fyge, anchor, tag, v); + + case FYGT_INT: + return fy_encode_generic_int(fyge, anchor, tag, v); + + case FYGT_FLOAT: + return fy_encode_generic_float(fyge, anchor, tag, v); + + case FYGT_STRING: + return fy_encode_generic_string(fyge, anchor, tag, v); + + case FYGT_SEQUENCE: + return fy_encode_generic_sequence(fyge, anchor, tag, v); + + case FYGT_MAPPING: + return fy_encode_generic_mapping(fyge, anchor, tag, v); + + case FYGT_ALIAS: + return fy_encode_generic_alias(fyge, v); + + default: + assert(0); + break; + } + + abort(); /* not yet */ + return -1; +} + +int fy_generic_encoder_emit_document(struct fy_generic_encoder *fyge, fy_generic vroot, fy_generic vds) +{ + fy_generic vtags, vversion, vhandle, vprefix; + const fy_generic *items; + struct fy_version *vers = NULL, vers_local; + struct fy_tag **tags = NULL, *tag; + bool version_explicit, tags_explicit; + int rc; + size_t i, count; + + if (!fyge || vroot == fy_invalid) + return -1; + + /* must not emit stream end twice */ + if (fyge->emitted_stream_end) + return -1; + + /* the document state must be a mapping */ + if (vds != fy_invalid && fy_generic_get_type(vds) == FYGT_MAPPING) { + vversion = fy_generic_mapping_lookup(vds, fy_generic_string_alloca("version")); + + if (fy_generic_get_type(vversion) == FYGT_MAPPING) { + vers = &vers_local; + memset(vers, 0, sizeof(*vers)); + vers->major = fy_generic_get_int(fy_generic_mapping_lookup(vversion, fy_generic_string_alloca("major"))); + vers->minor = fy_generic_get_int(fy_generic_mapping_lookup(vversion, fy_generic_string_alloca("minor"))); + } + + vtags = fy_generic_mapping_lookup(vds, fy_generic_string_alloca("tags")); + if (fy_generic_get_type(vtags) == FYGT_SEQUENCE) { + items = fy_generic_sequence_get_items(vtags, &count); + + vhandle = fy_generic_string_alloca("handle"); + vprefix = fy_generic_string_alloca("prefix"); + + tags = alloca((count + 1) * sizeof(*tags)); + for (i = 0; i < count; i++) { + tag = alloca(sizeof(*tag)); + tag->handle = fy_generic_get_string_alloca(fy_generic_mapping_lookup(items[i], vhandle)); + tag->prefix = fy_generic_get_string_alloca(fy_generic_mapping_lookup(items[i], vprefix)); + + tags[i] = tag; + } + tags[i] = NULL; + } + + version_explicit = fy_generic_mapping_lookup(vds, fy_generic_string_alloca("version-explicit")) == fy_true; + tags_explicit = fy_generic_mapping_lookup(vds, fy_generic_string_alloca("tags-explicit")) == fy_true; + + if (!version_explicit) + vers = NULL; + if (!tags_explicit) + tags = NULL; + } + + if (!fyge->emitted_stream_start) { + rc = fy_emit_eventf(fyge->emit, FYET_STREAM_START); + if (rc) + goto err_out; + fyge->emitted_stream_start = true; + } + + rc = fy_emit_eventf(fyge->emit, FYET_DOCUMENT_START, 0, vers, tags); + if (rc) + goto err_out; + + rc = fy_encode_generic(fyge, vroot); + if (rc) + goto err_out; + + rc = fy_emit_eventf(fyge->emit, FYET_DOCUMENT_END, 0); + if (rc) + goto err_out; + + return 0; +err_out: + return -1; +} + +int fy_generic_encoder_sync(struct fy_generic_encoder *fyge) +{ + int rc; + + if (!fyge) + return -1; + + /* if we have done a stream start and no stream do it now */ + if (fyge->emitted_stream_start && !fyge->emitted_stream_end) { + rc = fy_emit_eventf(fyge->emit, FYET_STREAM_END); + if (rc) + return -1; + fyge->emitted_stream_end = true; + } + + return 0; +} + +int fy_generic_encoder_emit_all_documents(struct fy_generic_encoder *fyge, fy_generic vdir) +{ + size_t i, count; + const fy_generic *items; + fy_generic vroot, vroot_key, vdocs, vdocs_key; + int rc; + + /* must be a sequence */ + if (fy_generic_get_type(vdir) != FYGT_SEQUENCE) + return -1; + + vroot_key = fy_generic_string_alloca("root"); + vdocs_key = fy_generic_string_alloca("docs"); + + /* no documents? nothing to emit */ + items = fy_generic_sequence_get_items(vdir, &count); + if (!items || !count) + return 0; + + for (i = 0; i < count; i++) { + + vroot = fy_generic_mapping_lookup(items[i], vroot_key); + if (vroot == fy_invalid) + return -1; + + vdocs = fy_generic_mapping_lookup(items[i], vdocs_key); + if (vdocs == fy_invalid) + return -1; + + rc = fy_generic_encoder_emit_document(fyge, vroot, vdocs); + if (rc) + return rc; + } + + return 0; +} diff --git a/src/util/fy-generic-encoder.h b/src/util/fy-generic-encoder.h new file mode 100644 index 00000000..11c8d69c --- /dev/null +++ b/src/util/fy-generic-encoder.h @@ -0,0 +1,35 @@ +/* + * fy-generic-encoder.h - generic encoder (generic -> yaml) + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_GENERIC_ENCODER_H +#define FY_GENERIC_ENCODER_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "fy-allocator.h" +#include "fy-generic.h" + +struct fy_generic_encoder { + struct fy_emitter *emit; + bool verbose; + bool emitted_stream_start; + bool emitted_stream_end; +}; + +struct fy_generic_encoder * +fy_generic_encoder_create(struct fy_emitter *emit, bool verbose); +void fy_generic_encoder_destroy(struct fy_generic_encoder *fyge); + +int fy_generic_encoder_emit_document(struct fy_generic_encoder *fyge, fy_generic root, fy_generic vds); +int fy_generic_encoder_emit_all_documents(struct fy_generic_encoder *fyge, fy_generic vdir); +int fy_generic_encoder_sync(struct fy_generic_encoder *fyge); + +#endif diff --git a/src/util/fy-generic.c b/src/util/fy-generic.c new file mode 100644 index 00000000..9fb5a24a --- /dev/null +++ b/src/util/fy-generic.c @@ -0,0 +1,801 @@ +/* + * fy-generic.c - space efficient generics + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include + +#include + +/* for container_of */ +#include "fy-list.h" + +#include "fy-allocator.h" +#include "fy-generic.h" + +struct fy_generic_builder *fy_generic_builder_create(struct fy_allocator *a, fy_alloc_tag shared_tag) +{ + fy_alloc_tag alloc_tag = FY_ALLOC_TAG_NONE; + struct fy_generic_builder *gb = NULL; + + if (!a) + return NULL; + + alloc_tag = shared_tag; + if (alloc_tag == FY_ALLOC_TAG_NONE) { + alloc_tag = fy_allocator_get_tag(a, NULL); + if (alloc_tag == FY_ALLOC_TAG_ERROR) + goto err_out; + } + + gb = malloc(sizeof(*gb)); + if (!gb) + goto err_out; + memset(gb, 0, sizeof(*gb)); + + gb->allocator = a; + gb->shared_tag = shared_tag; + gb->alloc_tag = alloc_tag; + + return gb; + +err_out: + if (shared_tag != FY_ALLOC_TAG_NONE) + fy_allocator_release_tag(a, alloc_tag); + if (gb) + free(gb); + return NULL; +} + +void fy_generic_builder_destroy(struct fy_generic_builder *gb) +{ + if (!gb) + return; + + /* if we own the allocator, just destroy it, everything is gone */ + if (gb->allocator && gb->owns_allocator) + fy_allocator_destroy(gb->allocator); + else if (gb->shared_tag == FY_ALLOC_TAG_NONE) + fy_allocator_release_tag(gb->allocator, gb->alloc_tag); + + free(gb); +} + +void fy_generic_builder_reset(struct fy_generic_builder *gb) +{ + if (!gb) + return; + + if (gb->shared_tag == FY_ALLOC_TAG_NONE) + fy_allocator_reset_tag(gb->allocator, gb->alloc_tag); +} + +fy_generic fy_generic_float_create(struct fy_generic_builder *gb, double val) +{ + const double *valp; +#ifdef FY_HAS_64BIT_PTR + float f; + uint32_t fi; + + if (fy_double_fits_in_float(val)) { + f = (float)val; + memcpy(&fi, &f, sizeof(fi)); + return ((fy_generic)fi << FY_FLOAT_INPLACE_SHIFT) | FY_FLOAT_INPLACE_V; + } +#endif + valp = fy_generic_builder_store(gb, &val, sizeof(val), FY_SCALAR_ALIGNOF(double)); + if (!valp) + return fy_invalid; + assert(((uintptr_t)valp & FY_INPLACE_TYPE_MASK) == 0); + return (fy_generic)valp | FY_FLOAT_OUTPLACE_V; +} + +fy_generic fy_generic_string_size_create(struct fy_generic_builder *gb, const char *str, size_t len) +{ + uint8_t lenbuf[FYGT_SIZE_ENCODING_MAX]; + struct fy_iovecw iov[3]; + const void *s; + void *p; + + switch (len) { + case 0: + return ((fy_generic)0) | + (0 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 1: + return ((fy_generic)str[0] << 8) | + (1 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 2: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + (2 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 3: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + ((fy_generic)str[2] << 24) | + (3 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; +#ifdef FY_HAS_64BIT_PTR + case 4: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + ((fy_generic)str[2] << 24) | + ((fy_generic)str[3] << 32) | + (4 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 5: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + ((fy_generic)str[2] << 24) | + ((fy_generic)str[3] << 32) | + ((fy_generic)str[4] << 40) | + (5 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 6: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + ((fy_generic)str[2] << 24) | + ((fy_generic)str[3] << 32) | + ((fy_generic)str[4] << 40) | + ((fy_generic)str[5] << 48) | + (6 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; + case 7: + return ((fy_generic)str[0] << 8) | + ((fy_generic)str[1] << 16) | + ((fy_generic)str[2] << 24) | + ((fy_generic)str[3] << 32) | + ((fy_generic)str[4] << 40) | + ((fy_generic)str[5] << 48) | + ((fy_generic)str[6] << 56) | + (7 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; +#endif + default: + break; + } + + p = fy_encode_size(lenbuf, sizeof(lenbuf), len); + assert(p); + + iov[0].data = lenbuf; + iov[0].size = (size_t)((uint8_t *)p - lenbuf) ; + iov[1].data = str; + iov[1].size = len; + iov[2].data = "\x00"; /* null terminate always */ + iov[2].size = 1; + + /* strings are aligned at 8 always */ + s = fy_generic_builder_storev(gb, iov, ARRAY_SIZE(iov), 8); + if (!s) + return fy_invalid; + + assert(((uintptr_t)s & FY_INPLACE_TYPE_MASK) == 0); + return (fy_generic)s | FY_STRING_OUTPLACE_V; +} + +fy_generic +fy_generic_string_vcreate(struct fy_generic_builder *gb, const char *fmt, va_list ap) +{ + va_list ap2; + char *str; + size_t size; + + va_copy(ap2, ap); + + size = vsnprintf(NULL, 0, fmt, ap); + if (size < 0) + return fy_invalid; + + str = alloca(size + 1); + size = vsnprintf(str, size + 1, fmt, ap2); + if (size < 0) + return fy_invalid; + + return fy_generic_string_size_create(gb, str, size); +} + +fy_generic +fy_generic_string_createf(struct fy_generic_builder *gb, const char *fmt, ...) +{ + va_list ap; + fy_generic v; + + va_start(ap, fmt); + v = fy_generic_string_vcreate(gb, fmt, ap); + va_end(ap); + + return v; +} + +fy_generic fy_generic_sequence_create(struct fy_generic_builder *gb, size_t count, const fy_generic *items) +{ + struct fy_generic_sequence s; + const struct fy_generic_sequence *p; + struct fy_iovecw iov[2]; + size_t i; + + if (count && !items) + return fy_invalid; + + for (i = 0; i < count; i++) { + if (items[i] == fy_invalid) + return fy_invalid; + } + + memset(&s, 0, sizeof(s)); + s.count = count; + + iov[0].data = &s; + iov[0].size = sizeof(s); + iov[1].data = items; + iov[1].size = count * sizeof(*items); + + p = fy_generic_builder_storev(gb, iov, ARRAY_SIZE(iov), FY_CONTAINER_ALIGNOF(struct fy_generic_sequence)); + if (!p) + return fy_invalid; + + return (fy_generic)p | FY_SEQ_V; +} + +fy_generic fy_generic_mapping_create(struct fy_generic_builder *gb, size_t count, const fy_generic *pairs) +{ + struct fy_generic_mapping m; + const struct fy_generic_mapping *p; + struct fy_iovecw iov[2]; + size_t i; + + if (count && !pairs) + return fy_invalid; + + for (i = 0; i < count * 2; i++) { + if (pairs[i] == fy_invalid) + return fy_invalid; + } + + memset(&m, 0, sizeof(m)); + m.count = count; + + iov[0].data = &m; + iov[0].size = sizeof(m); + iov[1].data = pairs; + iov[1].size = 2 * count * sizeof(*pairs); + + p = fy_generic_builder_storev(gb, iov, ARRAY_SIZE(iov), FY_CONTAINER_ALIGNOF(struct fy_generic_mapping)); + if (!p) + return fy_invalid; + + return (fy_generic)p | FY_MAP_V; +} + +fy_generic fy_generic_mapping_lookup(fy_generic map, fy_generic key) +{ + struct fy_generic_mapping *p; + const fy_generic *pair; + size_t i; + + p = fy_generic_resolve_collection_ptr(map); + assert(p); + pair = p->pairs; + for (i = 0; i < p->count; i++, pair += 2) { + if (fy_generic_compare(key, pair[0]) == 0) + return pair[1]; + } + return fy_invalid; +} + +fy_generic fy_generic_indirect_create(struct fy_generic_builder *gb, const struct fy_generic_indirect *gi) +{ + const void *p; + struct fy_iovecw iov[4]; + size_t cnt; + uint8_t flags; + + cnt = 0; + + flags = 0; + if (gi->value != fy_invalid) + flags |= FYGIF_VALUE; + if (gi->anchor != fy_null && gi->anchor != fy_invalid) + flags |= FYGIF_ANCHOR; + if (gi->tag != fy_null && gi->tag != fy_invalid) + flags |= FYGIF_TAG; + iov[cnt].data = &flags; + iov[cnt++].size = sizeof(flags); + if (flags & FYGIF_VALUE) { + iov[cnt].data = &gi->value; + iov[cnt++].size = sizeof(gi->value); + } + if (flags & FYGIF_ANCHOR) { + iov[cnt].data = &gi->anchor; + iov[cnt++].size = sizeof(gi->anchor); + } + if (flags & FYGIF_TAG) { + iov[cnt].data = &gi->tag; + iov[cnt++].size = sizeof(gi->tag); + } + + p = fy_generic_builder_storev(gb, iov, cnt, FY_SCALAR_ALIGNOF(uint8_t)); /* must be at least 8 */ + if (!p) + return fy_invalid; + + return (fy_generic)p | FY_INDIRECT_V; +} + +fy_generic fy_generic_alias_create(struct fy_generic_builder *gb, fy_generic anchor) +{ + struct fy_generic_indirect gi = { + .value = fy_invalid, + .anchor = anchor, + .tag = fy_invalid, + }; + + return fy_generic_indirect_create(gb, &gi); +} + +int fy_generic_sequence_compare(fy_generic seqa, fy_generic seqb) +{ + size_t i, counta, countb; + const fy_generic *itemsa, *itemsb; + int ret; + + if (seqa == seqb) + return 0; + + itemsa = fy_generic_sequence_get_items(seqa, &counta); + itemsb = fy_generic_sequence_get_items(seqb, &countb); + + if (counta != countb) + goto out; + + /* empty? just fine */ + if (counta == 0) + return 0; + + /* try to cheat by comparing contents */ + ret = memcmp(itemsa, itemsb, counta * sizeof(*itemsa)); + if (!ret) + return 0; /* great! binary match */ + + /* have to do it the hard way */ + for (i = 0; i < counta; i++) { + ret = fy_generic_compare(itemsa[i], itemsb[i]); + if (ret) + goto out; + } + + /* exhaustive check */ + return 0; +out: + return seqa > seqb ? 1 : -1; /* keep order, but it's just address based */ +} + +int fy_generic_mapping_compare(fy_generic mapa, fy_generic mapb) +{ + size_t i, counta, countb; + const fy_generic *pairsa, *pairsb; + fy_generic key, vala, valb; + int ret; + + if (mapa == mapb) + return 0; + + pairsa = fy_generic_mapping_get_pairs(mapa, &counta); + pairsb = fy_generic_mapping_get_pairs(mapb, &countb); + + if (counta != countb) + goto out; + + /* empty? just fine */ + if (counta == 0) + return 0; + + /* try to cheat by comparing contents */ + ret = memcmp(pairsa, pairsb, counta * 2 * sizeof(*pairsa)); + if (!ret) + return 0; /* great! binary match */ + + /* have to do it the hard way */ + for (i = 0; i < counta * 2; i++) { + key = pairsa[i * 2]; + vala = pairsa[i * 2 + 1]; + + /* find if the key exists in the other mapping */ + valb = fy_generic_mapping_lookup(mapa, key); + if (valb == fy_invalid) + goto out; + + /* compare values */ + ret = fy_generic_compare(vala, valb); + if (ret) + goto out; + } + + /* all the keys value pairs match */ + + return 0; +out: + return mapa > mapb ? 1 : -1; /* keep order, but it's just address based */ +} + +static inline int fy_generic_bool_compare(fy_generic a, fy_generic b) +{ + int ba, bb; + + ba = (int)fy_generic_get_bool(a); + bb = (int)fy_generic_get_bool(b); + return ba > bb ? 1 : + ba < bb ? -1 : 0; +} + +static inline int fy_generic_int_compare(fy_generic a, fy_generic b) +{ + long long ia, ib; + + ia = fy_generic_get_int(a); + ib = fy_generic_get_int(b); + return ia > ib ? 1 : + ia < ib ? -1 : 0; +} + +static inline int fy_generic_float_compare(fy_generic a, fy_generic b) +{ + double da, db; + + da = fy_generic_get_float(a); + db = fy_generic_get_float(b); + return da > db ? 1 : + da < db ? -1 : 0; +} + +static inline int fy_generic_string_compare(fy_generic a, fy_generic b) +{ + const char *sa, *sb; + size_t sza = 0, szb = 0; + int ret; + + sa = fy_generic_get_string_size_alloca(a, &sza); + sb = fy_generic_get_string_size_alloca(b, &szb); + + ret = memcmp(sa, sb, sza > szb ? szb : sza); + + if (!ret && sza != szb) + ret = 1; + return ret; +} + +static inline int fy_generic_alias_compare(fy_generic a, fy_generic b) +{ + const char *sa, *sb; + size_t sza = 0, szb = 0; + int ret; + + sa = fy_generic_get_alias_size_alloca(a, &sza); + sb = fy_generic_get_alias_size_alloca(b, &szb); + + ret = memcmp(sa, sb, sza > szb ? szb : sza); + + if (!ret && sza != szb) + ret = 1; + return ret; +} + +int fy_generic_compare_out_of_place(fy_generic a, fy_generic b) +{ + enum fy_generic_type at, bt; + + /* invalids are always non-matching */ + if (a == fy_invalid || b == fy_invalid) + return -1; + + /* equals? nice - should work for null, bool, in place int, float and strings */ + /* also for anything that's a pointer */ + if (a == b) + return 0; + + at = fy_generic_get_type(a); + bt = fy_generic_get_type(b); + + /* invalid types, or differing types do not match */ + if (at != bt) + return -1; + + switch (fy_generic_get_type(a)) { + case FYGT_NULL: + return 0; /* two nulls are always equal to each other */ + + case FYGT_BOOL: + return fy_generic_bool_compare(a, b); + + case FYGT_INT: + return fy_generic_int_compare(a, b); + + case FYGT_FLOAT: + return fy_generic_float_compare(a, b); + + case FYGT_STRING: + return fy_generic_string_compare(a, b); + + case FYGT_SEQUENCE: + return fy_generic_sequence_compare(a, b); + + case FYGT_MAPPING: + return fy_generic_mapping_compare(a, b); + + case FYGT_ALIAS: + return fy_generic_alias_compare(a, b); + + default: + /* we don't handle anything else */ + assert(0); + abort(); + } +} + +#define COPY_MALLOC_CUTOFF 256 + +fy_generic fy_generic_builder_copy_out_of_place(struct fy_generic_builder *gb, fy_generic v) +{ + const struct fy_generic_sequence *seqs; + fy_generic vi; + const struct fy_generic_mapping *maps; + struct fy_iovecw iov[2]; + enum fy_generic_type type; + size_t size, len, i, count; + const void *valp; + const uint8_t *p, *str; + const fy_generic *itemss; + fy_generic *items; + + if (v == fy_invalid) + return fy_invalid; + + /* indirects are handled here (note, aliases are indirect too) */ + if (fy_generic_is_indirect(v)) { + struct fy_generic_indirect gi; + + fy_generic_indirect_get(v, &gi); + + gi.value = fy_generic_builder_copy(gb, gi.value); + gi.anchor = fy_generic_builder_copy(gb, gi.anchor); + gi.tag = fy_generic_builder_copy(gb, gi.tag); + + return fy_generic_indirect_create(gb, &gi); + } + + type = fy_generic_get_type(v); + if (type == FYGT_NULL || type == FYGT_BOOL) + return v; + + /* if we got to here, it's going to be a copy */ + switch (type) { + case FYGT_INT: + if (v & FY_INT_INPLACE_V) + return v; + valp = fy_generic_builder_store(gb, fy_generic_resolve_ptr(v), + sizeof(long long), FY_SCALAR_ALIGNOF(long long)); + if (!valp) + return fy_invalid; + return (fy_generic)valp | FY_INT_OUTPLACE_V; + + case FYGT_FLOAT: + if (v & FY_FLOAT_INPLACE_V) + return v; + valp = fy_generic_builder_store(gb, fy_generic_resolve_ptr(v), + sizeof(double), FY_SCALAR_ALIGNOF(double)); + if (!valp) + return fy_invalid; + return (fy_generic)valp | FY_FLOAT_OUTPLACE_V; + + case FYGT_STRING: + if (v & FY_STRING_INPLACE_V) + return v; + p = fy_generic_resolve_ptr(v); + str = fy_decode_size(p, FYGT_SIZE_ENCODING_MAX, &len); + if (!str) + return fy_invalid; + size = (size_t)(str - p) + len; + valp = fy_generic_builder_store(gb, p, size, 8); + if (!valp) + return fy_invalid; + return (fy_generic)valp | FY_STRING_OUTPLACE_V; + + case FYGT_SEQUENCE: + seqs = fy_generic_resolve_collection_ptr(v); + count = seqs->count; + itemss = seqs->items; + + size = sizeof(*items) * count; + if (size <= COPY_MALLOC_CUTOFF) + items = alloca(size); + else { + items = malloc(size); + if (!items) + return fy_invalid; + } + + for (i = 0; i < count; i++) { + vi = fy_generic_builder_copy(gb, itemss[i]); + if (vi == fy_invalid) + break; + items[i] = vi; + } + + if (i >= count) { + iov[0].data = seqs; + iov[0].size = sizeof(*seqs); + iov[1].data = items; + iov[1].size = size; + valp = fy_generic_builder_storev(gb, iov, ARRAY_SIZE(iov), + FY_CONTAINER_ALIGNOF(struct fy_generic_sequence)); + } else + valp = NULL; + + if (size > COPY_MALLOC_CUTOFF) + free(items); + + if (!valp) + return fy_invalid; + + return (fy_generic)valp | FY_SEQ_V; + + case FYGT_MAPPING: + maps = fy_generic_resolve_collection_ptr(v); + count = maps->count * 2; + itemss = maps->pairs; + + size = sizeof(*items) * count; + if (size <= COPY_MALLOC_CUTOFF) + items = alloca(size); + else { + items = malloc(size); + if (!items) + return fy_invalid; + } + + if (size > COPY_MALLOC_CUTOFF) + free(items); + + /* copy both keys and values */ + for (i = 0; i < count; i++) { + vi = fy_generic_builder_copy(gb, itemss[i]); + if (vi == fy_invalid) + break; + items[i] = vi; + } + + if (i >= count) { + iov[0].data = maps; + iov[0].size = sizeof(*maps); + iov[1].data = items; + iov[1].size = size, + valp = fy_generic_builder_storev(gb, iov, ARRAY_SIZE(iov), + FY_CONTAINER_ALIGNOF(struct fy_generic_mapping)); + } else + valp = NULL; + + if (size > COPY_MALLOC_CUTOFF) + free(items); + + if (!valp) + return fy_invalid; + + return (fy_generic)valp | FY_MAP_V; + + default: + break; + } + + assert(0); + abort(); + return fy_invalid; +} + +fy_generic fy_generic_relocate(void *start, void *end, fy_generic v, ptrdiff_t d) +{ + void *p; + struct fy_generic_indirect *gi; + struct fy_generic_sequence *seq; + struct fy_generic_mapping *map; + fy_generic *items, *pairs; + size_t i, count; + + /* the delta can't have those bits */ + assert((d & FY_INPLACE_TYPE_MASK) == 0); + + /* no relocation needed */ + if (d == 0) + return v; + + /* if it's indirect, resolve the internals */ + if (fy_generic_is_indirect(v)) { + + /* check if already relocated */ + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_ptr(v, d) | FY_INDIRECT_V; + gi = fy_generic_resolve_ptr(v); + gi->value = fy_generic_relocate(start, end, gi->value, d); + gi->anchor = fy_generic_relocate(start, end, gi->anchor, d); + gi->tag = fy_generic_relocate(start, end, gi->tag, d); + return v; + } + + /* if it's not indirect, it might be one of the in place formats */ + switch (fy_generic_get_type(v)) { + case FYGT_NULL: + case FYGT_BOOL: + return v; + + case FYGT_INT: + if ((v & FY_INPLACE_TYPE_MASK) == FY_INT_INPLACE_V) + return v; + + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_ptr(v, d) | FY_INT_OUTPLACE_V; + break; + + case FYGT_FLOAT: + if ((v & FY_INPLACE_TYPE_MASK) == FY_FLOAT_INPLACE_V) + return v; + + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_ptr(v, d) | FY_FLOAT_OUTPLACE_V; + break; + + case FYGT_STRING: + if ((v & FY_INPLACE_TYPE_MASK) == FY_STRING_INPLACE_V) + return v; + + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_ptr(v, d) | FY_STRING_OUTPLACE_V; + break; + + case FYGT_SEQUENCE: + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_collection_ptr(v, d) | FY_SEQ_V; + seq = fy_generic_resolve_collection_ptr(v); + count = seq->count; + items = (fy_generic *)seq->items; + for (i = 0; i < count; i++) + items[i] = fy_generic_relocate(start, end, items[i], d); + break; + + case FYGT_MAPPING: + p = fy_generic_resolve_ptr(v); + if (p >= start && p < end) + return v; + + v = fy_generic_relocate_collection_ptr(v, d) | FY_MAP_V; + map = fy_generic_resolve_collection_ptr(v); + count = map->count * 2; + pairs = (fy_generic *)map->pairs; + for (i = 0; i < count; i++) + pairs[i] = fy_generic_relocate(start, end, pairs[i], d); + break; + + default: + /* should never get here */ + assert(0); + abort(); + break; + } + + return v; +} diff --git a/src/util/fy-generic.h b/src/util/fy-generic.h new file mode 100644 index 00000000..32460962 --- /dev/null +++ b/src/util/fy-generic.h @@ -0,0 +1,1016 @@ +/* + * fy-generic.h - space efficient generics + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_GENERIC_H +#define FY_GENERIC_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fy-utils.h" +#include "fy-allocator.h" +#include "fy-vlsize.h" + +#include + +enum fy_generic_type { + FYGT_INVALID, + FYGT_NULL, + FYGT_BOOL, + FYGT_INT, + FYGT_FLOAT, + FYGT_STRING, + FYGT_SEQUENCE, + FYGT_MAPPING, + FYGT_INDIRECT, + FYGT_ALIAS, +}; + +/* type is encoded at the lower 3 bits which is satisfied with the 8 byte alignment */ +typedef uintptr_t fy_generic; + +#define FYGT_INT_INPLACE_BITS_64 61 +#define FYGT_STRING_INPLACE_SIZE_64 7 +#define FYGT_SIZE_ENCODING_MAX_64 FYVL_SIZE_ENCODING_MAX_64 + +#define FYGT_INT_INPLACE_BITS_32 29 +#define FYGT_STRING_INPLACE_SIZE_32 3 +#define FYGT_SIZE_ENCODING_MAX_32 FYVL_SIZE_ENCODING_MAX_32 + +#ifdef FY_HAS_64BIT_PTR +#define FYGT_INT_INPLACE_BITS FYGT_INT_INPLACE_BITS_64 +#define FYGT_STRING_INPLACE_SIZE FYGT_STRING_INPLACE_SIZE_64 +#else +#define FYGT_INT_INPLACE_BITS FYGT_INT_INPLACE_BITS_32 +#define FYGT_STRING_INPLACE_SIZE FYGT_STRING_INPLACE_SIZE_32 +#endif + +#define FYGT_SIZE_ENCODING_MAX FYVL_SIZE_ENCODING_MAX + + +// 64 bit memory layout for generic types +// +// |63 8|7654|3|210| +// -------------+----------------+----|-+---+ +// null 0 |0000000000000000|0000|0|000| +// sequence 0 |pppppppppppppppp|pppp|0|000| pointer to a 16 byte aligned sequence +// mapping 0 |pppppppppppppppp|pppp|1|000| pointer to a 16 byte aligned mapping +// bool 0 |0000000000000000|0000|1|000| false +// 0 |1111111111111111|1111|1|000| true +// int 1 |xxxxxxxxxxxxxxxx|xxxx|x|001| int bits <= 61 +// 2 |pppppppppppppppp|pppp|p|010| 8 byte aligned pointer to an long long +// float 3 |ffffffffffffffff|0000|0|011| 32 bit float without loss of precision +// 4 |pppppppppppppppp|pppp|p|100| pointer to 8 byte aligned double +// string 5 |ssssssssssssssss|0lll|0|101| string length <= 7 lll 3 bit length +// x y two available bits for styling info +// 6 |pppppppppppppppp|pppp|p|110| 8 byte aligned pointer to a string +// indirect 7 |pppppppppppppppp|pppp|p|111| 8 byte aligned pointer to an indirect +// invalid |1111111111111111|1111|1|111| All bits set +// +// 32 bit memory layout for generic types +// +// |32 8|7654|3|210| +// -------------+----------------+----|-+---+ +// null 0 |0000000000000000|0000|0|000| +// sequence 0 |pppppppppppppppp|pppp|0|000| pointer to a 16 byte aligned sequence +// mapping 0 |pppppppppppppppp|pppp|1|000| pointer to a 16 byte aligned mapping +// bool 0 |0000000000000000|0000|1|000| false +// 0 |1111111111111111|1111|1|000| true +// int 1 |xxxxxxxxxxxxxxxx|xxxx|x|001| int bits <= 29 +// 2 |pppppppppppppppp|pppp|1|010| 8 byte aligned pointer to an long long +// float 3 |pppppppppppppppp|pppp|p|011| pointer to 8 byte aligned float +// 4 |pppppppppppppppp|pppp|p|100| pointer to 8 byte aligned double +// string 5 |ssssssssssssssss|00ll|0|101| string length <= 3 ll 2 bit length +// xy z three available bits for styling info +// 6 |pppppppppppppppp|pppp|p|110| 8 byte aligned pointer to a string +// indirect 7 |pppppppppppppppp|pppp|p|111| 8 byte aligned pointer to an indirect +// invalid |1111111111111111|1111|1|111| All bits set +// + +/* we use the bottom 3 bits to get the primitive types */ +#define FY_INPLACE_TYPE_SHIFT 3 +#define FY_INPLACE_TYPE_MASK (((fy_generic)1 << FY_INPLACE_TYPE_SHIFT) - 1) + +#define FY_NULL_V 0 +#define FY_SEQ_V 0 +#define FY_MAP_V 8 +#define FY_COLLECTION_MASK (((fy_generic)1 << (FY_INPLACE_TYPE_SHIFT + 1)) - 1) + +#define FY_BOOL_V 8 +#define FY_BOOL_INPLACE_SHIFT 4 + +#define FY_EXTERNAL_V 0 + +#define FY_INT_INPLACE_V 1 +#define FY_INT_OUTPLACE_V 2 +#define FY_INT_INPLACE_SHIFT 3 + +#define FY_FLOAT_INPLACE_V 3 +#define FY_FLOAT_OUTPLACE_V 4 +#define FY_FLOAT_INPLACE_SHIFT 32 + +#define FY_STRING_INPLACE_V 5 +#define FY_STRING_OUTPLACE_V 6 +#define FY_STRING_INPLACE_SIZE_SHIFT 4 + +#define FY_INDIRECT_V 7 + +#define fy_null ((fy_generic)0) /* simple does it */ +#define fy_false ((fy_generic)8) +#define fy_true (~(fy_generic)7) +#define fy_invalid ((fy_generic)-1) + +#define FYGT_STRING_INPLACE_BUF (FYGT_STRING_INPLACE_SIZE + 1) +#define FYGT_INT_INPLACE_MAX ((1LL << (FYGT_INT_INPLACE_BITS - 1)) - 1) +#define FYGT_INT_INPLACE_MIN (-(1LL << (FYGT_INT_INPLACE_BITS - 1))) + +#define FY_GENERIC_CONTAINER_ALIGNMENT __attribute__((aligned(16))) +#define FY_GENERIC_EXTERNAL_ALIGNMENT FY_GENERIC_CONTAINER_ALIGNMENT + +/* yes, plenty of side-effects, use it with care */ +#define FY_MAX_ALIGNOF(_v, _min) ((size_t)(alignof(_v) > (_min) ? alignof(_v) : (_min))) +#define FY_CONTAINER_ALIGNOF(_v) FY_MAX_ALIGNOF(_v, 16) +#define FY_SCALAR_ALIGNOF(_v) FY_MAX_ALIGNOF(_v, 8) + +/* + * The encoding of generic indirect + * + * 1 byte of flags + * value (if exists) + * anchor (if exists) + * tag (if exists) + * positional info (if it exists) + * > start: input_pos + * > start: line + * > start: column + * > end: delta input_pos + * > end: delta line + * > end: delta column + */ + +/* encoding an alias is anchor the string of the alias + * and value fy_invalid */ +struct fy_generic_indirect { + uintptr_t flags; /* styling and existence flags */ + fy_generic value; /* the actual value */ + fy_generic anchor; /* string anchor or null */ + fy_generic tag; /* string tag or null */ +}; + +#define FYGIF_VALUE (1U << 0) +#define FYGIF_ANCHOR (1U << 1) +#define FYGIF_TAG (1U << 2) +#define FYGIF_STYLE_SHIFT 4 +#define FYGIF_STYLE_MASK (7U << FYGIF_STYLE_SHIFT) +#define FYGIF_PLAIN (0U << FYGIF_STYLE_SHIFT) /* scalar styles */ +#define FYGIF_SINGLE_Q (1U << FYGIF_STYLE_SHIFT) +#define FYGIF_DOUBLE_Q (2U << FYGIF_STYLE_SHIFT) +#define FYGIF_LITERAL (3U << FYGIF_STYLE_SHIFT) +#define FYGIF_FOLDED (4U << FYGIF_STYLE_SHIFT) +#define FYGIF_BLOCK (5U << FYGIF_STYLE_SHIFT) /* collection styles */ +#define FYGIF_FLOW (6U << FYGIF_STYLE_SHIFT) + +static inline bool fy_generic_is_indirect(fy_generic v) +{ + return (v & FY_INPLACE_TYPE_MASK) == FY_INDIRECT_V && v != fy_invalid; +} + +static inline void *fy_generic_resolve_ptr(fy_generic ptr) +{ + /* clear the top 3 bits (all pointers are 8 byte aligned) */ + /* note collections have the bit 3 cleared too, so it's 16 byte aligned */ + ptr &= ~(uintptr_t)FY_INPLACE_TYPE_MASK; + return (void *)ptr; +} + +static inline void *fy_generic_resolve_collection_ptr(fy_generic ptr) +{ + /* clear the top 3 bits (all pointers are 8 byte aligned) */ + /* note collections have the bit 3 cleared too, so it's 16 byte aligned */ + ptr &= ~(uintptr_t)FY_COLLECTION_MASK; + return (void *)ptr; +} + +static inline fy_generic fy_generic_relocate_ptr(fy_generic v, ptrdiff_t d) +{ + v = (fy_generic)((ptrdiff_t)((uintptr_t)v & ~(uintptr_t)FY_INPLACE_TYPE_MASK) + d); + assert((v & (uintptr_t)FY_INPLACE_TYPE_MASK) == 0); + return v; +} + +static inline fy_generic fy_generic_relocate_collection_ptr(fy_generic v, ptrdiff_t d) +{ + v = (fy_generic)((ptrdiff_t)((uintptr_t)v & ~(uintptr_t)FY_COLLECTION_MASK) + d); + assert((v & (uintptr_t)FY_COLLECTION_MASK) == 0); + return v; +} + +static inline enum fy_generic_type fy_generic_get_type(fy_generic v) +{ + static const uint8_t table[16] = { + [0] = FYGT_SEQUENCE, [8 | 0] = FYGT_MAPPING, + [1] = FYGT_INT, [8 | 1] = FYGT_INT, + [2] = FYGT_INT, [8 | 2] = FYGT_INT, + [3] = FYGT_FLOAT, [8 | 3] = FYGT_FLOAT, + [4] = FYGT_FLOAT, [8 | 4] = FYGT_FLOAT, + [5] = FYGT_STRING, [8 | 5] = FYGT_STRING, + [6] = FYGT_STRING, [8 | 6] = FYGT_STRING, + [7] = FYGT_INDIRECT, [8 | 7] = FYGT_INDIRECT, + }; + const uint8_t *p; + uint8_t flags; + enum fy_generic_type type; + + if (v == fy_invalid) + return FYGT_INVALID; + + if (v == fy_null) + return FYGT_NULL; + + if (v == fy_true || v == fy_false) + return FYGT_BOOL; + + type = table[v & 15]; + if (type != FYGT_INDIRECT) + return type; + + /* get the indirect */ + p = fy_generic_resolve_ptr(v); + + /* an invalid value marks an alias, the value of the alias is at the anchor */ + flags = *p++; + if (!(flags & FYGIF_VALUE)) + return FYGT_ALIAS; + + /* value immediately follows */ + memcpy(&v, p, sizeof(v)); + + if (v == fy_null) + return FYGT_NULL; + + if (v == fy_true || v == fy_false) + return FYGT_BOOL; + + type = table[v & 15]; + return type != FYGT_INDIRECT ? type : FYGT_INVALID; +} + +static inline bool fy_generic_is_in_place(fy_generic v) +{ + enum fy_generic_type t; + + t = fy_generic_get_type(v); + /* for int, float, string inplace forms, the in place tag is always odd (bit 0 set) */ + return t <= FYGT_BOOL || (t < FYGT_SEQUENCE && (v & 1)); +} + +static inline void fy_generic_indirect_get(fy_generic v, struct fy_generic_indirect *gi) +{ + const uint8_t *p; + uint8_t flags; + + assert(fy_generic_is_indirect(v)); + p = fy_generic_resolve_ptr(v); + + gi->flags = 0; + gi->value = fy_invalid; + gi->anchor = fy_invalid; + gi->tag = fy_invalid; + + /* get flags */ + flags = *p++; + if (flags & FYGIF_VALUE) { + memcpy(&gi->value, p, sizeof(gi->value)); + p += sizeof(gi->value); + } + if (flags & FYGIF_ANCHOR) { + memcpy(&gi->anchor, p, sizeof(gi->anchor)); + p += sizeof(gi->anchor); + } + if (flags & FYGIF_TAG) { + memcpy(&gi->tag, p, sizeof(gi->tag)); + p += sizeof(gi->tag); + } +} + +static inline fy_generic fy_generic_indirect_get_value(fy_generic v) +{ + const uint8_t *p; + uint8_t flags; + fy_generic vv; + + assert(fy_generic_is_indirect(v)); + p = fy_generic_resolve_ptr(v); + flags = *p++; + if (!(flags & FYGIF_VALUE)) + return fy_invalid; + memcpy(&vv, p, sizeof(vv)); + return vv; +} + +static inline fy_generic fy_generic_indirect_get_anchor(fy_generic v) +{ + struct fy_generic_indirect gi; + + fy_generic_indirect_get(v, &gi); + return gi.anchor; +} + +static inline fy_generic fy_generic_indirect_get_tag(fy_generic v) +{ + struct fy_generic_indirect gi; + + fy_generic_indirect_get(v, &gi); + return gi.tag; +} + +static inline fy_generic fy_generic_get_anchor(fy_generic v) +{ + fy_generic va; + + if (!fy_generic_is_indirect(v)) + return fy_null; + + va = fy_generic_indirect_get_anchor(v); + assert(va == fy_null || va == fy_invalid || fy_generic_get_type(va) == FYGT_STRING); + return va; +} + +static inline fy_generic fy_generic_get_tag(fy_generic v) +{ + fy_generic vt; + + if (!fy_generic_is_indirect(v)) + return fy_null; + + vt = fy_generic_indirect_get_tag(v); + assert(vt == fy_null || vt == fy_invalid || fy_generic_get_type(vt) == FYGT_STRING); + return vt; +} + +struct fy_generic_sequence { + size_t count; + fy_generic items[]; +}; + +struct fy_generic_mapping { + size_t count; + fy_generic pairs[]; +}; + +struct fy_generic_builder { + struct fy_allocator *allocator; + bool owns_allocator; + fy_alloc_tag shared_tag; + fy_alloc_tag alloc_tag; +}; + +struct fy_generic_builder *fy_generic_builder_create(struct fy_allocator *a, fy_alloc_tag shared_tag); +void fy_generic_builder_destroy(struct fy_generic_builder *gb); +void fy_generic_builder_reset(struct fy_generic_builder *gb); + +static inline void *fy_generic_builder_alloc(struct fy_generic_builder *gb, size_t size, size_t align) +{ + assert(gb); + return fy_allocator_alloc(gb->allocator, gb->alloc_tag, size, align); +} + +static inline void fy_generic_builder_free(struct fy_generic_builder *gb, void *ptr) +{ + assert(gb); + fy_allocator_free(gb->allocator, gb->alloc_tag, ptr); +} + +static inline void fy_generic_builder_trim(struct fy_generic_builder *gb) +{ + assert(gb); + fy_allocator_trim_tag(gb->allocator, gb->alloc_tag); +} + +static inline const void *fy_generic_builder_store(struct fy_generic_builder *gb, const void *data, size_t size, size_t align) +{ + assert(gb); + return fy_allocator_store(gb->allocator, gb->alloc_tag, data, size, align); +} + +static inline const void *fy_generic_builder_storev(struct fy_generic_builder *gb, const struct fy_iovecw *iov, unsigned int iovcnt, size_t align) +{ + assert(gb); + return fy_allocator_storev(gb->allocator, gb->alloc_tag, iov, iovcnt, align); +} + +static inline ssize_t fy_generic_builder_get_areas(struct fy_generic_builder *gb, struct fy_iovecw *iov, size_t maxiov) +{ + assert(gb); + return fy_allocator_get_areas(gb->allocator, gb->alloc_tag, iov, maxiov); +} + +static inline const void *fy_generic_builder_get_single_area(struct fy_generic_builder *gb, size_t *sizep, size_t *startp, size_t *allocp) +{ + assert(gb); + return fy_allocator_get_single_area(gb->allocator, gb->alloc_tag, sizep, startp, allocp); +} + +static inline void fy_generic_builder_release(struct fy_generic_builder *gb, const void *ptr, size_t size) +{ + assert(gb); + fy_allocator_release(gb->allocator, gb->alloc_tag, ptr, size); +} + +static inline bool fy_generic_get_bool(fy_generic v) +{ + if (fy_generic_is_indirect(v)) + v = fy_generic_indirect_get_value(v); + assert(fy_generic_get_type(v) == FYGT_BOOL); + return (v >> FY_BOOL_INPLACE_SHIFT) != 0; +} + +static inline long long fy_generic_get_int(fy_generic v) +{ + long long *p; + + if (fy_generic_is_indirect(v)) + v = fy_generic_indirect_get_value(v); + + assert(fy_generic_get_type(v) == FYGT_INT); + /* inplace? */ + if ((v & FY_INPLACE_TYPE_MASK) == FY_INT_INPLACE_V) + return (intptr_t)v >> FY_INPLACE_TYPE_SHIFT; + p = fy_generic_resolve_ptr(v); + return *p; +} + +static inline double fy_generic_get_float(fy_generic v) +{ +#ifndef FY_HAS_64BIT_PTR + float *pf; +#endif + double *pd; + + if (fy_generic_is_indirect(v)) + v = fy_generic_indirect_get_value(v); + + assert(fy_generic_get_type(v) == FYGT_FLOAT); + + /* in place for 64 bit, pointer for 32 bit */ + if ((v & FY_INPLACE_TYPE_MASK) == FY_FLOAT_INPLACE_V) { +#ifndef FY_HAS_64BIT_PTR + pf = fy_generic_resolve_ptr(v); + return (double)*pf; +#else +#if __BYTE_ORDER == __LITTLE_ENDIAN + return (double)*((float *)&v + 1); +#else + return (double)*((float *)&v); +#endif +#endif + } + pd = fy_generic_resolve_ptr(v); + return *pd; +} + +static inline const char *fy_generic_get_string_size(fy_generic v, char *inplace, size_t *lenp) +{ + size_t len; + + if (fy_generic_is_indirect(v)) + v = fy_generic_indirect_get_value(v); + + assert(fy_generic_get_type(v) == FYGT_STRING); + + /* in place */ + if ((v & FY_INPLACE_TYPE_MASK) == FY_STRING_INPLACE_V) { + + len = (v >> FY_STRING_INPLACE_SIZE_SHIFT) & FYGT_STRING_INPLACE_SIZE; + switch (len) { + case 0: + inplace[0] = '\0'; + break; + case 1: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = '\0'; + break; + case 2: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = '\0'; + break; + case 3: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = (char)(uint8_t)(v >> 24); + inplace[3] = '\0'; + break; +#ifdef FY_HAS_64BIT_PTR + case 4: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = (char)(uint8_t)(v >> 24); + inplace[3] = (char)(uint8_t)(v >> 32); + inplace[4] = '\0'; + break; + case 5: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = (char)(uint8_t)(v >> 24); + inplace[3] = (char)(uint8_t)(v >> 32); + inplace[4] = (char)(uint8_t)(v >> 40); + inplace[5] = '\0'; + break; + case 6: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = (char)(uint8_t)(v >> 24); + inplace[3] = (char)(uint8_t)(v >> 32); + inplace[4] = (char)(uint8_t)(v >> 40); + inplace[5] = (char)(uint8_t)(v >> 48); + inplace[6] = '\0'; + break; + case 7: + inplace[0] = (char)(uint8_t)(v >> 8); + inplace[1] = (char)(uint8_t)(v >> 16); + inplace[2] = (char)(uint8_t)(v >> 24); + inplace[3] = (char)(uint8_t)(v >> 32); + inplace[4] = (char)(uint8_t)(v >> 40); + inplace[5] = (char)(uint8_t)(v >> 48); + inplace[6] = (char)(uint8_t)(v >> 56); + inplace[7] = '\0'; + break; +#endif + default: /* will never happen but the compiler is stupid */ + assert(0); + len = 0; + break; + + } + *lenp = len; + return inplace; + } + return (const char *)fy_decode_size(fy_generic_resolve_ptr(v), FYGT_SIZE_ENCODING_MAX, lenp); +} + +static inline const char *fy_generic_get_string(fy_generic v, char *inplace) +{ + size_t size; + + return fy_generic_get_string_size(v, inplace, &size); +} + +#define fy_generic_get_string_size_alloca(_v, _lenp) \ + ({ \ + fy_generic __v = (_v); \ + char *__inplace = NULL; \ + \ + if (fy_generic_is_indirect(__v)) \ + __v = fy_generic_indirect_get_value(__v); \ + assert(fy_generic_get_type(__v) == FYGT_STRING); \ + if ((__v & FY_INPLACE_TYPE_MASK) == FY_STRING_INPLACE_V) \ + __inplace = alloca(FYGT_STRING_INPLACE_BUF); \ + fy_generic_get_string_size(__v, __inplace, (_lenp)); \ + }) + +#define fy_generic_get_string_alloca(_v) \ + ({ \ + size_t __len; \ + fy_generic_get_string_size_alloca((_v), &__len); \ + }) + +static inline fy_generic fy_generic_null_create(struct fy_generic_builder *gb) +{ + return fy_null; +} + +static inline fy_generic fy_generic_bool_create(struct fy_generic_builder *gb, bool state) +{ + return state ? fy_true : fy_false; +} + +#define fy_generic_bool_alloca(_v) ((_v) ? fy_true : fy_false) + +static inline fy_generic fy_generic_int_create(struct fy_generic_builder *gb, long long val) +{ + const long long *valp; + + if (val >= FYGT_INT_INPLACE_MIN && val <= FYGT_INT_INPLACE_MAX) + return (fy_generic)(val << FY_INT_INPLACE_SHIFT) | FY_INT_INPLACE_V; + + valp = fy_generic_builder_store(gb, &val, sizeof(val), FY_SCALAR_ALIGNOF(long long)); + if (!valp) + return fy_invalid; + assert(((uintptr_t)valp & FY_INPLACE_TYPE_MASK) == 0); + return (fy_generic)valp | FY_INT_OUTPLACE_V; +} + +#define fy_generic_int_alloca(_v) \ + ({ \ + long long __v = (_v); \ + long long *__vp; \ + fy_generic _r; \ + \ + if (__v >= FYGT_INT_INPLACE_MIN && __v <= FYGT_INT_INPLACE_MAX) \ + _r = (__v << FY_INT_INPLACE_SHIFT) | FY_INT_INPLACE_V; \ + else { \ + __vp = alloca(sizeof(*__vp)); \ + assert(((uintptr_t)__vp & FY_INPLACE_TYPE_MASK) == 0); \ + *__vp = __v; \ + _r = (fy_generic)__vp | FY_INT_OUTPLACE_V; \ + } \ + _r; \ + }) + +fy_generic fy_generic_float_create(struct fy_generic_builder *gb, double val); + +static inline bool fy_double_fits_in_float(double val) +{ + float f; + + if (!isnormal(val)) + return true; + + f = (float)val; + return (double)f == val; +} + +#ifdef FY_HAS_64BIT_PTR +#define fy_generic_float_alloca(_v) \ + ({ \ + double __v = (_v); \ + double *__vp; \ + fy_generic _r; \ + float __f; \ + uint32_t __fi; \ + \ + if (fy_double_fits_in_float(__v)) { \ + __f = (float)__v; \ + memcpy(&__fi, &__f, sizeof(__fi)); \ + _r = ((fy_generic)__fi << FY_FLOAT_INPLACE_SHIFT) | FY_FLOAT_INPLACE_V; \ + } else { \ + __vp = alloca(sizeof(*__vp)); \ + assert(((uintptr_t)__vp & FY_INPLACE_TYPE_MASK) == 0); \ + *__vp = __v; \ + _r = (fy_generic)__vp | FY_FLOAT_OUTPLACE_V; \ + } \ + _r; \ + }) +#else +#define fy_generic_float_alloca(_v) \ + ({ \ + double __v = (_v); \ + double *__vp; \ + fy_generic _r; \ + \ + __vp = alloca(sizeof(*__vp)); \ + assert(((uintptr_t)__vp & FY_INPLACE_TYPE_MASK) == 0); \ + *__vp = __v; \ + _r = (fy_generic)__vp | FY_FLOAT_OUTPLACE_V; \ + _r; \ + }) +#endif + +fy_generic fy_generic_string_size_create(struct fy_generic_builder *gb, const char *str, size_t len); + +static inline fy_generic fy_generic_string_create(struct fy_generic_builder *gb, const char *str) +{ + return fy_generic_string_size_create(gb, str, strlen(str)); +} + +fy_generic fy_generic_string_vcreate(struct fy_generic_builder *gb, const char *fmt, va_list ap); +fy_generic fy_generic_string_createf(struct fy_generic_builder *gb, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +#ifdef FY_HAS_64BIT_PTR +#define fy_generic_string_size_alloca(_v, _len) \ + ({ \ + const char *__v = (_v); \ + size_t __len = (_len); \ + uint8_t *__vp, *__s; \ + fy_generic _r; \ + \ + switch (__len) { \ + case 0: \ + _r = ((fy_generic)0) | \ + (0 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 1: \ + _r = ((fy_generic)__v[0] << 8) | \ + (1 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 2: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + (2 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 3: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + (3 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 4: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + ((fy_generic)__v[3] << 32) | \ + (4 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 5: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + ((fy_generic)__v[3] << 32) | \ + ((fy_generic)__v[4] << 40) | \ + (5 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 6: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + ((fy_generic)__v[3] << 32) | \ + ((fy_generic)__v[4] << 40) | \ + ((fy_generic)__v[5] << 48) | \ + (6 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 7: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + ((fy_generic)__v[3] << 32) | \ + ((fy_generic)__v[4] << 40) | \ + ((fy_generic)__v[5] << 48) | \ + ((fy_generic)__v[6] << 56) | \ + (7 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + default: \ + __vp = alloca(FYGT_SIZE_ENCODING_MAX + __len + 1); \ + assert(((uintptr_t)__vp & FY_INPLACE_TYPE_MASK) == 0); \ + __s = fy_encode_size(__vp, FYGT_SIZE_ENCODING_MAX, __len); \ + memcpy(__s, __v, __len); \ + __s[__len] = '\0'; \ + _r = (fy_generic)__vp | FY_STRING_OUTPLACE_V; \ + break; \ + } \ + _r; \ + }) +#else +#define fy_generic_string_size_alloca(_v, _len) \ + ({ \ + const char *__v = (_v); \ + size_t __len = (_len); \ + uint8_t *__vp, *__s; \ + fy_generic _r; \ + \ + switch (len) { \ + case 0: \ + _r = ((fy_generic)0) | \ + (0 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 1: \ + _r = ((fy_generic)__v[0] << 8) | \ + (1 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 2: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + (2 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + case 3: \ + _r = ((fy_generic)__v[0] << 8) | \ + ((fy_generic)__v[1] << 16) | \ + ((fy_generic)__v[2] << 24) | \ + (3 << FY_STRING_INPLACE_SIZE_SHIFT) | FY_STRING_INPLACE_V; \ + break; \ + default: \ + __vp = alloca(FYGT_SIZE_ENCODING_MAX + __len + 1); \ + assert(((uintptr_t)__vp & FY_INPLACE_TYPE_MASK) == 0); \ + __s = fy_encode_size(__vp, FYGT_SIZE_ENCODING_MAX, __len); \ + memcpy(__s, __v, __len); \ + __s[__len] = '\0'; \ + _r = (fy_generic)__vp | FY_STRING_OUTPLACE_V; \ + break; \ + } \ + _r; \ + }) +#endif + +#define fy_generic_string_alloca(_v) \ + ({ \ + const char *___v = (_v); \ + size_t ___len = strlen(___v); \ + fy_generic_string_size_alloca(___v, ___len); \ + }) + +fy_generic fy_generic_sequence_create(struct fy_generic_builder *gb, size_t count, const fy_generic *items); + +#define fy_generic_sequence_alloca(_count, _items) \ + ({ \ + struct fy_generic_sequence *__vp; \ + size_t __count = (_count); \ + size_t __size = sizeof(*__vp) + __count * sizeof(fy_generic); \ + \ + __vp = fy_ptr_align(alloca(__size + 15), 16); \ + __vp->count = (_count); \ + memcpy(__vp->items, (_items), __count * sizeof(fy_generic)); \ + (fy_generic)__vp | FY_SEQ_V; \ + }) + +static inline const fy_generic *fy_generic_sequence_get_items(fy_generic seq, size_t *countp) +{ + struct fy_generic_sequence *p; + + assert(countp); + + if (fy_generic_is_indirect(seq)) + seq = fy_generic_indirect_get_value(seq); + + assert(fy_generic_get_type(seq) == FYGT_SEQUENCE); + + p = fy_generic_resolve_collection_ptr(seq); + + *countp = p->count; + return &p->items[0]; +} + +static inline fy_generic fy_generic_sequence_get_item(fy_generic seq, size_t idx) +{ + const fy_generic *items; + size_t count; + + items = fy_generic_sequence_get_items(seq, &count); + if (idx >= count) + return fy_invalid; + return items[idx]; +} + +static inline size_t fy_generic_sequence_get_item_count(fy_generic seq) +{ + struct fy_generic_sequence *p; + + if (fy_generic_is_indirect(seq)) + seq = fy_generic_indirect_get_value(seq); + + assert(fy_generic_get_type(seq) == FYGT_SEQUENCE); + + p = fy_generic_resolve_collection_ptr(seq); + return p->count; +} + +fy_generic fy_generic_mapping_create(struct fy_generic_builder *gb, size_t count, const fy_generic *pairs); + +#define fy_generic_mapping_alloca(_count, _pairs) \ + ({ \ + struct fy_generic_mapping *__vp; \ + size_t __count = (_count); \ + size_t __size = sizeof(*__vp) + 2 * __count * sizeof(fy_generic); \ + \ + __vp = fy_ptr_align(alloca(__size + 15), 16); \ + __vp->count = (_count); \ + memcpy(__vp->pairs, (_pairs), 2 * __count * sizeof(fy_generic)); \ + (fy_generic)__vp | FY_MAP_V; \ + }) + +static inline const fy_generic *fy_generic_mapping_get_pairs(fy_generic map, size_t *countp) +{ + struct fy_generic_mapping *p; + + if (fy_generic_is_indirect(map)) + map = fy_generic_indirect_get_value(map); + + assert(fy_generic_get_type(map) == FYGT_MAPPING); + + p = fy_generic_resolve_collection_ptr(map); + assert(p); + assert(countp); + + *countp = p->count; + return p->pairs; +} + +static inline size_t fy_generic_mapping_get_pair_count(fy_generic map) +{ + struct fy_generic_mapping *p; + + if (fy_generic_is_indirect(map)) + map = fy_generic_indirect_get_value(map); + + assert(fy_generic_get_type(map) == FYGT_MAPPING); + + p = fy_generic_resolve_collection_ptr(map); + assert(p); + return p->count; +} + +static inline const char *fy_generic_get_alias_size(fy_generic v, char *inplace, size_t *lenp) +{ + fy_generic va; + + assert(fy_generic_is_indirect(v)); + + va = fy_generic_indirect_get_anchor(v); + return fy_generic_get_string_size(va, inplace, lenp); +} + +static inline const char *fy_generic_get_alias(fy_generic v, char *inplace) +{ + size_t size; + + return fy_generic_get_alias_size(v, inplace, &size); +} + +#define fy_generic_get_alias_size_alloca(_v, _lenp) \ + fy_generic_get_string_size_alloca( \ + fy_generic_indirect_get_anchor(_v), (_lenp)) + +#define fy_generic_get_alias_alloca(_v) \ + ({ \ + size_t __len; \ + fy_generic_get_alias_size_alloca((_v), &__len); \ + }) + +fy_generic fy_generic_mapping_lookup(fy_generic map, fy_generic key); + +fy_generic fy_generic_indirect_create(struct fy_generic_builder *gb, const struct fy_generic_indirect *gi); + +fy_generic fy_generic_alias_create(struct fy_generic_builder *gb, fy_generic anchor); + +int fy_generic_compare_out_of_place(fy_generic a, fy_generic b); + +static inline int fy_generic_compare(fy_generic a, fy_generic b) +{ + /* invalids are always non-matching */ + if (a == fy_invalid || b == fy_invalid) + return -1; + + /* equals? nice - should work for null, bool, in place int, float and strings */ + /* also for anything that's a pointer */ + if (a == b) + return 0; + + /* invalid types, or differing types do not match */ + if (fy_generic_get_type(a) != fy_generic_get_type(b)) + return -1; + + return fy_generic_compare_out_of_place(a, b); +} + +fy_generic fy_generic_builder_copy_out_of_place(struct fy_generic_builder *gb, fy_generic v); + +static inline fy_generic fy_generic_builder_copy(struct fy_generic_builder *gb, fy_generic v) +{ + if (v == fy_invalid) + return fy_invalid; + + if (!fy_generic_is_indirect(v)) { + + switch (fy_generic_get_type(v)) { + case FYGT_NULL: + case FYGT_BOOL: + return v; + + case FYGT_INT: + if (v & FY_INT_INPLACE_V) + return v; + break; + + case FYGT_FLOAT: + if (v & FY_FLOAT_INPLACE_V) + return v; + break; + + case FYGT_STRING: + if (v & FY_STRING_INPLACE_V) + return v; + break; + + default: + break; + } + } + + return fy_generic_builder_copy_out_of_place(gb, v); +} + +fy_generic fy_generic_relocate(void *start, void *end, fy_generic v, ptrdiff_t d); + +enum fy_generic_schema { + FYGS_AUTO, + FYGS_FALLBACK, + FYGS_YAML1_1, + FYGS_YAML1_2, + FYGS_JSON, +}; + +#endif diff --git a/src/util/fy-utils.h b/src/util/fy-utils.h index 86448985..fd3c53a3 100644 --- a/src/util/fy-utils.h +++ b/src/util/fy-utils.h @@ -17,6 +17,12 @@ #include #include #include +#include + +/* to avoid dragging in libfyaml.h */ +#ifndef FY_BIT +#define FY_BIT(x) (1U << (x)) +#endif #if defined(__APPLE__) && (_POSIX_C_SOURCE < 200809L) FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -40,7 +46,7 @@ int fy_tag_scan(const char *data, size_t len, struct fy_tag_scan_info *info); #define ARRAY_SIZE(x) ((sizeof(x)/sizeof((x)[0]))) #endif -#if !defined(NDEBUG) && (defined(__GNUC__) && __GNUC__ >= 4) +#if defined(NDEBUG) && (defined(__GNUC__) && __GNUC__ >= 4) #define FY_ALWAYS_INLINE __attribute__((always_inline)) #else #define FY_ALWAYS_INLINE /* nothing */ @@ -52,6 +58,36 @@ int fy_tag_scan(const char *data, size_t len, struct fy_tag_scan_info *info); #define FY_UNUSED /* nothing */ #endif +#if defined(NDEBUG) && defined(__GNUC__) && __GNUC__ >= 4 +#define FY_DEBUG_UNUSED __attribute__((unused)) +#else +#define FY_DEBUG_UNUSED /* nothing */ +#endif + +#if defined(__GNUC__) && __GNUC__ >= 4 +#define FY_CONSTRUCTOR __attribute__((constructor)) +#define FY_DESTRUCTOR __attribute__((destructor)) +#define FY_HAS_CONSTRUCTOR +#define FY_HAS_DESTRUCTOR +#else +#define FY_CONSTRUCTOR /* nothing */ +#define FY_DESTRUCTOR /* nothing */ +#endif + +#if defined(FY_DESTRUCTOR) && defined(NDEBUG) +#define FY_DESTRUCTOR_SHOW_LEFTOVERS +#endif + +static inline void *fy_ptr_align(void *p, size_t align) +{ + return (void *)(((uintptr_t)p + (align - 1)) & ~(align - 1)); +} + +static inline size_t fy_size_t_align(size_t size, size_t align) +{ + return (size + (align - 1)) & ~(align - 1); +} + int fy_term_set_raw(int fd, struct termios *oldt); int fy_term_restore(int fd, const struct termios *oldt); ssize_t fy_term_write(int fd, const void *data, size_t count); diff --git a/src/util/fy-vlsize.h b/src/util/fy-vlsize.h new file mode 100644 index 00000000..18a8eb72 --- /dev/null +++ b/src/util/fy-vlsize.h @@ -0,0 +1,378 @@ +/* + * fy-vlsize.h - variable length size encoding + * + * Copyright (c) 2023 Pantelis Antoniou + * + * SPDX-License-Identifier: MIT + */ +#ifndef FY_VLSIZE_H +#define FY_VLSIZE_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +// size encoding for 64 bit +// +// high bit is set, more follow with 0 ending the run +// The final 9th byte always terminates the run +// +// 0 1 2 3 4 5 6 7 8 +// -- -- -- -- -- -- -- -- -- +// 0k 7 bit length k +// 1k 0l 14 bit length kl +// 1k 1l 0m 21 bit length klm +// 1k 1l 1m 0n 28 bit length klmn +// 1k 1l 1m 1n 0o 35 bit length klmno +// 1k 1l 1m 1n 1o 0p 42 bit length klmnop +// 1k 1l 1m 1n 1o 1p 0q 49 bit length klmnopq +// 1k 1l 1m 1n 1o 1p 1q 0r 56 bit length klmnopqr +// 1k 1l 1m 1n 1o 1p 1q 1r t 64 bit length klmnopqrt +// +// size encoding for 32 bit +// +// high bit is set, more follow with 0 ending the run +// The final 9th byte always terminates the run +// +// 0 1 2 3 4 5 6 7 8 +// -- -- -- -- -- -- -- -- -- +// 0k 7 bit length k +// 1k 0l 14 bit length kl +// 1k 1l 0m 21 bit length klm +// 1k 1l 1m 0n 28 bit length klmn +// 1xk 1l 1m 1n o 32 bit length klmno 4 high bits ignored (at byte 0) +// + +#define FYVL_SIZE_ENCODING_MAX_64 9 // 7 * 8 + 8 = 64 bits +#define FYVL_SIZE_ENCODING_MAX_32 5 // 7 * 4 + 4 = 32 bits + +/* 32 bit specific */ +static inline unsigned int +fy_encode_size32_bytes(uint32_t size) +{ + if (size < ((uint32_t)1 << 7)) + return 1; + if (size < ((uint32_t)1 << 14)) + return 2; + if (size < ((uint32_t)1 << 21)) + return 3; + if (size < ((uint32_t)1 << 28)) + return 4; + return 5; +} + +static inline uint8_t * +fy_encode_size32(uint8_t *p, uint32_t bufsz, uint32_t size) +{ + uint8_t *end = p + bufsz; + + if (size < ((uint32_t)1 << 7)) { + if (p + 1 > end) + return NULL; + p[0] = (uint8_t)size; + return p + 1; + } + if (size < ((uint32_t)1 << 14)) { + if (p + 2 > end) + return NULL; + p[0] = (uint8_t)(size >> 7) | 0x80; + p[1] = (uint8_t)size & 0x7f; + return p + 2; + } + if (size < ((uint32_t)1 << 21)) { + if (p + 3 > end) + return NULL; + p[0] = (uint8_t)(size >> 14) | 0x80; + p[1] = (uint8_t)(size >> 7) | 0x80; + p[2] = (uint8_t)size & 0x7f; + return p + 3; + } + if (size < ((uint32_t)1 << 28)) { + if (p + 4 > end) + return NULL; + p[0] = (uint8_t)(size >> 21) | 0x80; + p[1] = (uint8_t)(size >> 14) | 0x80; + p[2] = (uint8_t)(size >> 7) | 0x80; + p[3] = (uint8_t)size & 0x7f; + return p + 4; + } + if (p + 5 > end) + return NULL; + p[0] = (uint8_t)(size >> 29) | 0x80; + p[1] = (uint8_t)(size >> 22) | 0x80; + p[2] = (uint8_t)(size >> 15) | 0x80; + p[3] = (uint8_t)(size >> 8) | 0x80; + p[4] = (uint8_t)size; + return p + 5; +} + +static inline const uint8_t * +fy_decode_size32(const uint8_t *start, size_t bufsz, uint32_t *sizep) +{ + const uint8_t *p, *end, *end_scan, *end_max_scan; + uint32_t size; + + end = start + bufsz; + + end_max_scan = start + FYVL_SIZE_ENCODING_MAX_32; + + if (end_max_scan < end) + end_scan = end_max_scan; + else + end_scan = end; + + end_max_scan--; + + p = start; + size = 0; + while (p < end_scan) { + if (p < end_max_scan) { + size <<= 7; + size |= (*p & 0x7f); + if (!(*p & 0x80)) + goto done; + } else { + /* last one is always the full 8 bit */ + size <<= 8; + size |= *p; + goto done; + } + p++; + } + + *sizep = (uint32_t)-1; + return NULL; +done: + if (++p >= end) + p = end; + *sizep = size; + return p; +} + +static inline unsigned int +fy_encode_size64_bytes(uint64_t size) +{ + if (size < ((uint64_t)1 << 7)) + return 1; + if (size < ((uint64_t)1 << 14)) + return 2; + if (size < ((uint64_t)1 << 21)) + return 3; + if (size < ((uint64_t)1 << 28)) + return 4; + if (size < ((uint64_t)1 << 35)) + return 5; + if (size < ((uint64_t)1 << 42)) + return 6; + if (size < ((uint64_t)1 << 49)) + return 7; + if (size < ((uint64_t)1 << 56)) + return 8; + return 9; +} + +static inline uint8_t * +fy_encode_size64(uint8_t *p, size_t bufsz, uint64_t size) +{ + uint8_t *end = p + bufsz; + + if (size < ((uint64_t)1 << 7)) { + if (p + 1 > end) + return NULL; + p[0] = (uint8_t)size; + return p + 1; + } + if (size < ((uint64_t)1 << 14)) { + if (p + 2 > end) + return NULL; + p[0] = (uint8_t)(size >> 7) | 0x80; + p[1] = (uint8_t)size & 0x7f; + return p + 2; + } + if (size < ((uint64_t)1 << 21)) { + if (p + 3 > end) + return NULL; + p[0] = (uint8_t)(size >> 14) | 0x80; + p[1] = (uint8_t)(size >> 7) | 0x80; + p[2] = (uint8_t)size & 0x7f; + return p + 3; + } + if (size < ((uint64_t)1 << 28)) { + if (p + 4 > end) + return NULL; + p[0] = (uint8_t)(size >> 21) | 0x80; + p[1] = (uint8_t)(size >> 14) | 0x80; + p[2] = (uint8_t)(size >> 7) | 0x80; + p[3] = (uint8_t)size & 0x7f; + return p + 4; + } + if (size < ((uint64_t)1 << 35)) { + if (p + 5 > end) + return NULL; + p[0] = (uint8_t)(size >> 28) | 0x80; + p[1] = (uint8_t)(size >> 21) | 0x80; + p[2] = (uint8_t)(size >> 14) | 0x80; + p[3] = (uint8_t)(size >> 7) | 0x80; + p[4] = (uint8_t)size & 0x7f; + return p + 5; + } + if (size < ((uint64_t)1 << 42)) { + if (p + 6 > end) + return NULL; + p[0] = (uint8_t)(size >> 35) | 0x80; + p[1] = (uint8_t)(size >> 28) | 0x80; + p[2] = (uint8_t)(size >> 21) | 0x80; + p[3] = (uint8_t)(size >> 14) | 0x80; + p[4] = (uint8_t)(size >> 7) | 0x80; + p[5] = (uint8_t)size & 0x7f; + return p + 6; + } + if (size < ((uint64_t)1 << 49)) { + if (p + 7 > end) + return NULL; + p[0] = (uint8_t)(size >> 42) | 0x80; + p[1] = (uint8_t)(size >> 35) | 0x80; + p[2] = (uint8_t)(size >> 28) | 0x80; + p[3] = (uint8_t)(size >> 21) | 0x80; + p[4] = (uint8_t)(size >> 14) | 0x80; + p[5] = (uint8_t)(size >> 7) | 0x80; + p[6] = (uint8_t)size & 0x7f; + return p + 7; + } + if (size < ((uint64_t)1 << 56)) { + if (p + 8 > end) + return NULL; + p[0] = (uint8_t)(size >> 49) | 0x80; + p[1] = (uint8_t)(size >> 42) | 0x80; + p[2] = (uint8_t)(size >> 35) | 0x80; + p[3] = (uint8_t)(size >> 28) | 0x80; + p[4] = (uint8_t)(size >> 21) | 0x80; + p[5] = (uint8_t)(size >> 14) | 0x80; + p[6] = (uint8_t)(size >> 7) | 0x80; + p[7] = (uint8_t)size & 0x7f; + return p + 8; + } + if (p + 9 > end) + return NULL; + p[0] = (uint8_t)(size >> 57) | 0x80; + p[1] = (uint8_t)(size >> 50) | 0x80; + p[2] = (uint8_t)(size >> 43) | 0x80; + p[3] = (uint8_t)(size >> 36) | 0x80; + p[4] = (uint8_t)(size >> 29) | 0x80; + p[5] = (uint8_t)(size >> 22) | 0x80; + p[6] = (uint8_t)(size >> 15) | 0x80; + p[7] = (uint8_t)(size >> 8) | 0x80; + p[8] = (uint8_t)size; + return p + 9; +} + +static inline const uint8_t * +fy_decode_size64(const uint8_t *start, size_t bufsz, uint64_t *sizep) +{ + const uint8_t *p, *end, *end_scan, *end_max_scan; + size_t size; + + end = start + bufsz; + + end_max_scan = start + FYVL_SIZE_ENCODING_MAX_64; + + if (end_max_scan < end) + end_scan = end_max_scan; + else + end_scan = end; + + end_max_scan--; + + p = start; + size = 0; + while (p < end_scan) { + if (p < end_max_scan) { + size <<= 7; + size |= (*p & 0x7f); + if (!(*p & 0x80)) + goto done; + } else { + /* last one is always 8 bit */ + size <<= 8; + size |= *p; + goto done; + } + p++; + } + + *sizep = (size_t)-1; + return NULL; +done: + if (++p >= end) + p = end; + *sizep = size; + return p; +} + +/* is pointless to pretend size_t is anything other than 64 or 32 bits */ +#if SIZE_MAX == UINT64_MAX + +static inline unsigned int +fy_encode_size_bytes(size_t size) +{ + return fy_encode_size64_bytes(size); +} + +static inline uint8_t * +fy_encode_size(uint8_t *p, size_t bufsz, size_t size) +{ + return fy_encode_size64(p, bufsz, size); +} + +static inline const uint8_t * +fy_decode_size(const uint8_t *start, size_t bufsz, size_t *sizep) +{ + uint64_t sz; + const uint8_t *ret; + + ret = fy_decode_size64(start, bufsz, &sz); + if (!ret) { + *sizep = 0; + return NULL; + } + *sizep = sz; + return ret; +} + +#define FYVL_SIZE_ENCODING_MAX FYVL_SIZE_ENCODING_MAX_64 + +#else + +static inline unsigned int +fy_encode_size_bytes(size_t size) +{ + return fy_encode_size32_bytes(size); +} + +static inline uint8_t * +fy_encode_size(uint8_t *p, size_t bufsz, size_t size) +{ + return fy_encode_size32(p, bufsz, size); +} + +static inline const uint8_t * +fy_decode_size(const uint8_t *start, size_t bufsz, size_t *sizep) +{ + uint32_t sz; + const uint8_t *ret; + + ret = fy_decode_size32(start, bufsz, &sz); + if (!ret) + return NULL; + *sizep = sz; + return ret; +} + +#define FYVL_SIZE_ENCODING_MAX FYVL_SIZE_ENCODING_MAX_32 + +#endif + +#endif diff --git a/test/Makefile.am b/test/Makefile.am index f70ddc1d..87e79342 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -30,7 +30,8 @@ if HAVE_COMPATIBLE_CHECK check_PROGRAMS = libfyaml-test libfyaml_test_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/src/valgrind/ \ -I$(top_srcdir)/src/lib/ \ - -I$(top_srcdir)/src/util + -I$(top_srcdir)/src/util \ + -I$(top_srcdir)/src/reflection libfyaml_test_LDADD = $(AM_LDADD) $(CHECK_LIBS) $(top_builddir)/src/libfyaml.la libfyaml_test_CFLAGS = $(AM_CFLAGS) $(CHECK_CFLAGS) libfyaml_test_LDFLAGS = $(AM_LDFLAGS) $(CHECK_LDFLAGS)