From 39d2ddc93556fa8bf0b479bab516f8a46b2280a8 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 20 May 2022 14:13:59 +0530 Subject: [PATCH 1/8] fix doxygen warnings in cudf/io/avro.hpp --- cpp/include/cudf/io/avro.hpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 0e00d14291d..21468bea131 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,9 +35,6 @@ namespace io { * @file */ -/** - * @brief Builder to build options for `read_avro()`. - */ class avro_reader_options_builder; /** @@ -73,21 +70,29 @@ class avro_reader_options { /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns names of the columns to be read. + * + * @return Names of the columns to be read */ [[nodiscard]] std::vector get_columns() const { return _columns; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start */ [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read */ [[nodiscard]] size_type get_num_rows() const { return _num_rows; } @@ -121,6 +126,9 @@ class avro_reader_options { static avro_reader_options_builder builder(source_info const& src); }; +/** + * @brief Builder to build options for `read_avro()`. + */ class avro_reader_options_builder { avro_reader_options options; @@ -184,6 +192,8 @@ class avro_reader_options_builder { * @brief move avro_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `avro_reader_options` object's r-value reference */ avro_reader_options&& build() { return std::move(options); } }; From 342a84caf9127e8c7732fbbdabe14a4b3467acdd Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 20 May 2022 14:14:33 +0530 Subject: [PATCH 2/8] fix doxygen warnings in cudf/io/csv.hpp --- cpp/include/cudf/io/csv.hpp | 115 +++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 44ede9b0d63..9216ac3147c 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -158,26 +158,36 @@ class csv_reader_options { /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns compression format of the source. + * + * @return Compression format of the source */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns number of bytes to skip from source start. + * + * @return Number of bytes to skip from source start */ [[nodiscard]] std::size_t get_byte_range_offset() const { return _byte_range_offset; } /** * @brief Returns number of bytes to read. + * + * @return Number of bytes to read */ [[nodiscard]] std::size_t get_byte_range_size() const { return _byte_range_size; } /** * @brief Returns number of bytes to read with padding. + * + * @return Number of bytes to read with padding */ [[nodiscard]] std::size_t get_byte_range_size_with_padding() const { @@ -190,6 +200,8 @@ class csv_reader_options { /** * @brief Returns number of bytes to pad when reading. + * + * @return Number of bytes to pad when reading */ [[nodiscard]] std::size_t get_byte_range_padding() const { @@ -212,21 +224,29 @@ class csv_reader_options { /** * @brief Returns names of the columns. + * + * @return Names of the columns */ [[nodiscard]] std::vector const& get_names() const { return _names; } /** * @brief Returns prefix to be used for column ID. + * + * @return Prefix to be used for column ID */ [[nodiscard]] std::string get_prefix() const { return _prefix; } /** * @brief Whether to rename duplicate column names. + * + * @return Boolean indicating whether to rename duplicate column names */ [[nodiscard]] bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; } /** * @brief Returns names of the columns to be read. + * + * @return Names of the columns to be read */ [[nodiscard]] std::vector const& get_use_cols_names() const { @@ -235,91 +255,127 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read. + * + * @return Indexes of columns to read */ [[nodiscard]] std::vector const& get_use_cols_indexes() const { return _use_cols_indexes; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read */ [[nodiscard]] size_type get_nrows() const { return _nrows; } /** * @brief Returns number of rows to skip from start. + * + * @return Number of rows to skip from start */ [[nodiscard]] size_type get_skiprows() const { return _skiprows; } /** * @brief Returns number of rows to skip from end. + * + * @return Number of rows to skip from end */ [[nodiscard]] size_type get_skipfooter() const { return _skipfooter; } /** * @brief Returns header row index. + * + * @return Header row index */ [[nodiscard]] size_type get_header() const { return _header; } /** * @brief Returns line terminator. + * + * @return Line terminator */ [[nodiscard]] char get_lineterminator() const { return _lineterminator; } /** * @brief Returns field delimiter. + * + * @return Field delimiter */ [[nodiscard]] char get_delimiter() const { return _delimiter; } /** * @brief Returns numeric data thousands separator. + * + * @return Numeric data thousands separator */ [[nodiscard]] char get_thousands() const { return _thousands; } /** * @brief Returns decimal point character. + * + * @return Decimal point character */ [[nodiscard]] char get_decimal() const { return _decimal; } /** * @brief Returns comment line start character. + * + * @return Comment line start character */ [[nodiscard]] char get_comment() const { return _comment; } /** * @brief Whether to treat `\r\n` as line terminator. + * + * @return Boolean indicating whether to treat `\r\n` as line terminator */ [[nodiscard]] bool is_enabled_windowslinetermination() const { return _windowslinetermination; } /** * @brief Whether to treat whitespace as field delimiter. + * + * @return Boolean indicating whether to treat whitespace as field delimiter */ [[nodiscard]] bool is_enabled_delim_whitespace() const { return _delim_whitespace; } /** * @brief Whether to skip whitespace after the delimiter. + * + * @return Boolean indicating whether to skip whitespace after the delimiter */ [[nodiscard]] bool is_enabled_skipinitialspace() const { return _skipinitialspace; } /** * @brief Whether to ignore empty lines or parse line values as invalid. + * + * @return Boolean indicating whether to ignore empty lines or parse line values as invalid */ [[nodiscard]] bool is_enabled_skip_blank_lines() const { return _skip_blank_lines; } /** * @brief Returns quoting style. + * + * @return Quoting style */ [[nodiscard]] quote_style get_quoting() const { return _quoting; } /** * @brief Returns quoting character. + * + * @return Quoting character */ [[nodiscard]] char get_quotechar() const { return _quotechar; } /** * @brief Whether a quote inside a value is double-quoted. + * + * @return Boolean indicating whether a quote inside a value is double-quoted */ [[nodiscard]] bool is_enabled_doublequote() const { return _doublequote; } /** * @brief Returns names of columns to read as datetime. + * + * @return Names of columns to read as datetime */ [[nodiscard]] std::vector const& get_parse_dates_names() const { @@ -328,6 +384,8 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read as datetime. + * + * @return Indexes of columns to read as datetime */ [[nodiscard]] std::vector const& get_parse_dates_indexes() const { @@ -336,6 +394,8 @@ class csv_reader_options { /** * @brief Returns names of columns to read as hexadecimal. + * + * @return Names of columns to read as hexadecimal */ [[nodiscard]] std::vector const& get_parse_hex_names() const { @@ -344,11 +404,15 @@ class csv_reader_options { /** * @brief Returns indexes of columns to read as hexadecimal. + * + * @return Indexes of columns to read as hexadecimal */ [[nodiscard]] std::vector const& get_parse_hex_indexes() const { return _parse_hex_indexes; } /** * @brief Returns per-column types. + * + * @return Per-column types */ std::variant, std::map> const& get_dtypes() const { @@ -357,36 +421,50 @@ class csv_reader_options { /** * @brief Returns additional values to recognize as boolean true values. + * + * @return Additional values to recognize as boolean true values */ std::vector const& get_true_values() const { return _true_values; } /** * @brief Returns additional values to recognize as boolean false values. + * + * @return Additional values to recognize as boolean false values */ std::vector const& get_false_values() const { return _false_values; } /** * @brief Returns additional values to recognize as null values. + * + * @return Additional values to recognize as null values */ std::vector const& get_na_values() const { return _na_values; } /** * @brief Whether to keep the built-in default NA values. + * + * @return Boolean indicating whether to keep the built-in default NA values */ bool is_enabled_keep_default_na() const { return _keep_default_na; } /** * @brief Whether to disable null filter. + * + * @return Boolean indicating whether to disable null filter */ bool is_enabled_na_filter() const { return _na_filter; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. + * + * @return True if dates are parsed as DD/MM, false if MM/DD. */ bool is_enabled_dayfirst() const { return _dayfirst; } /** * @brief Returns timestamp_type to which all timestamp columns will be cast. + * + * @return timestamp_type to which all timestamp columns will be cast */ data_type get_timestamp_type() const { return _timestamp_type; } @@ -723,8 +801,12 @@ class csv_reader_options { void set_timestamp_type(data_type type) { _timestamp_type = type; } }; +/** + * @brief Builder to build options for `read_csv()`. + * + */ class csv_reader_options_builder { - csv_reader_options options; + csv_reader_options options; ///< Options to be built. public: /** @@ -1194,6 +1276,8 @@ class csv_reader_options_builder { * @brief move csv_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `csv_reader_options` object's r-value reference */ csv_reader_options&& build() { return std::move(options); } }; @@ -1288,51 +1372,71 @@ class csv_writer_options { /** * @brief Returns sink used for writer output. + * + * @return sink used for writer output */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns table that would be written to output. + * + * @return Table that would be written to output */ [[nodiscard]] table_view const& get_table() const { return _table; } /** * @brief Returns optional associated metadata. + * + * @return Optional associated metadata */ [[nodiscard]] table_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns string to used for null entries. + * + * @return string to used for null entries */ [[nodiscard]] std::string get_na_rep() const { return _na_rep; } /** * @brief Whether to write headers to csv. + * + * @return Boolean value indicating whether to write headers to csv. */ [[nodiscard]] bool is_enabled_include_header() const { return _include_header; } /** * @brief Returns maximum number of rows to process for each file write. + * + * @return Maximum number of rows to process for each file write. */ [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; } /** * @brief Returns character used for separating lines. + * + * @return Character used for separating lines. */ [[nodiscard]] std::string get_line_terminator() const { return _line_terminator; } /** * @brief Returns character used for separating lines. + * + * @return Character used for separating lines. */ [[nodiscard]] char get_inter_column_delimiter() const { return _inter_column_delimiter; } /** * @brief Returns string used for values != 0 in INT8 types. + * + * @return string used for values != 0 in INT8 types */ [[nodiscard]] std::string get_true_value() const { return _true_value; } /** * @brief Returns string used for values == 0 in INT8 types. + * + * @return string used for values == 0 in INT8 types */ [[nodiscard]] std::string get_false_value() const { return _false_value; } @@ -1394,8 +1498,11 @@ class csv_writer_options { void set_false_value(std::string val) { _false_value = val; } }; +/** + * @brief Builder to build options for `writer_csv()` + */ class csv_writer_options_builder { - csv_writer_options options; + csv_writer_options options; ///< Options to be built. public: /** @@ -1521,6 +1628,8 @@ class csv_writer_options_builder { * @brief move `csv_writer_options` member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `csv_writer_options` object's r-value reference */ csv_writer_options&& build() { return std::move(options); } }; From 192b31e032d21982e0786c140d76bf60e9d997c4 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 20 May 2022 14:14:50 +0530 Subject: [PATCH 3/8] fix doxygen warnings in cudf/io/json.hpp --- cpp/include/cudf/io/json.hpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index e2d4de83b49..d9397015ea3 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -36,9 +36,6 @@ namespace io { * @file */ -/** - * @brief Builds settings to use for `read_json()`. - */ class json_reader_options_builder; /** @@ -110,11 +107,15 @@ class json_reader_options { /** * @brief Returns source info. + * + * @returns Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns data types of the columns. + * + * @returns Data types of the columns. */ std::variant, std::map> const& get_dtypes() const { @@ -123,21 +124,29 @@ class json_reader_options { /** * @brief Returns compression format of the source. + * + * @return Compression format of the source */ compression_type get_compression() const { return _compression; } /** * @brief Returns number of bytes to skip from source start. + * + * @return Number of bytes to skip from source start */ size_t get_byte_range_offset() const { return _byte_range_offset; } /** * @brief Returns number of bytes to read. + * + * @return Number of bytes to read */ size_t get_byte_range_size() const { return _byte_range_size; } /** * @brief Returns number of bytes to read with padding. + * + * @return Number of bytes to read with padding */ size_t get_byte_range_size_with_padding() const { @@ -150,6 +159,8 @@ class json_reader_options { /** * @brief Returns number of bytes to pad when reading. + * + * @return Number of bytes to pad */ size_t get_byte_range_padding() const { @@ -170,11 +181,15 @@ class json_reader_options { /** * @brief Whether to read the file as a json object per line. + * + * @return Boolean indicating whether to read the file as a json object per line */ bool is_enabled_lines() const { return _lines; } /** * @brief Whether to parse dates as DD/MM versus MM/DD. + * + * @returns true if dates are parsed as DD/MM, false if MM/DD. */ bool is_enabled_dayfirst() const { return _dayfirst; } @@ -228,6 +243,9 @@ class json_reader_options { void enable_dayfirst(bool val) { _dayfirst = val; } }; +/** + * @brief Builds settings to use for `read_json()`. + */ class json_reader_options_builder { json_reader_options options; @@ -339,6 +357,8 @@ class json_reader_options_builder { * @brief move json_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `json_reader_options` object r-value reference */ json_reader_options&& build() { return std::move(options); } }; From 5846ff1cbf5c8c050ecc1d4bd8c656207a367b25 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 20 May 2022 14:15:20 +0530 Subject: [PATCH 4/8] fix doxygen warnings in cudf/io/orc.hpp --- cpp/include/cudf/io/orc.hpp | 91 ++++++++++++++++++++++++++-- cpp/include/cudf/io/orc_metadata.hpp | 35 ++++++----- 2 files changed, 106 insertions(+), 20 deletions(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 9e8fd1244d0..6ae6b45dc7e 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -34,9 +34,9 @@ namespace io { * @file */ -constexpr size_t default_stripe_size_bytes = 64 * 1024 * 1024; -constexpr size_type default_stripe_size_rows = 1000000; -constexpr size_type default_row_index_stride = 10000; +constexpr size_t default_stripe_size_bytes = 64 * 1024 * 1024; ///< 64MB default orc stripe size +constexpr size_type default_stripe_size_rows = 1000000; ///< 1M rows default orc stripe rows +constexpr size_type default_row_index_stride = 10000; ///< 10K rows default orc row index stride /** * @brief Builds settings to use for `read_orc()`. @@ -97,46 +97,64 @@ class orc_reader_options { /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns names of the columns to read. + * + * @return Names of the columns to read */ [[nodiscard]] std::vector const& get_columns() const { return _columns; } /** * @brief Returns vector of vectors, stripes to read for each input source + * + * @return Vector of vectors, stripes to read for each input source */ std::vector> const& get_stripes() const { return _stripes; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start */ size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of row to read. + * + * @return Number of row to read */ size_type get_num_rows() const { return _num_rows; } /** * @brief Whether to use row index to speed-up reading. + * + * @return Boolean indicating whether to use row index to speed-up reading */ bool is_enabled_use_index() const { return _use_index; } /** * @brief Whether to use numpy-compatible dtypes. + * + * @return Boolean indicating whether to use numpy-compatible dtypes */ bool is_enabled_use_np_dtypes() const { return _use_np_dtypes; } /** * @brief Returns timestamp type to which timestamp column will be cast. + * + * @return Timestamp type to which timestamp column will be cast */ data_type get_timestamp_type() const { return _timestamp_type; } /** - * @brief Fully qualified names of columns that should be read as 128-bit Decimal. + * @brief Returns fully qualified names of columns that should be read as 128-bit Decimal. + * + * @return Fully qualified names of columns that should be read as 128-bit Decimal */ std::vector const& get_decimal128_columns() const { return _decimal128_columns; } @@ -215,6 +233,9 @@ class orc_reader_options { } }; +/** + * @brief Builds settings to use for `read_orc()`. + */ class orc_reader_options_builder { orc_reader_options options; @@ -338,6 +359,8 @@ class orc_reader_options_builder { * @brief move orc_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `orc_reader_options` object's r-value reference */ orc_reader_options&& build() { return std::move(options); } }; @@ -445,16 +468,22 @@ class orc_writer_options { /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression type. + * + * @return Compression type */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Whether writing column statistics is enabled/disabled. + * + * @return Boolean indicating whether writing column statistics is enabled/disabled */ [[nodiscard]] bool is_enabled_statistics() const { @@ -463,21 +492,29 @@ class orc_writer_options { /** * @brief Returns frequency of statistics collection. + * + * @return Frequency of statistics collection */ [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; } /** * @brief Returns maximum stripe size, in bytes. + * + * @return Maximum stripe size, in bytes */ [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. + * + * @return Maximum stripe size, in rows */ [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. + * + * @return Row index stride */ auto get_row_index_stride() const { @@ -487,16 +524,22 @@ class orc_writer_options { /** * @brief Returns table to be written to output. + * + * @return Table to be written to output */ [[nodiscard]] table_view get_table() const { return _table; } /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ [[nodiscard]] std::map const& get_key_value_metadata() const { @@ -526,6 +569,8 @@ class orc_writer_options { /** * @brief Sets the maximum stripe size, in bytes. + * + * @param size_bytes Maximum stripe size, in bytes to be set */ void set_stripe_size_bytes(size_t size_bytes) { @@ -538,6 +583,8 @@ class orc_writer_options { * * If the stripe size is smaller that the row group size, row group size will be reduced to math * the stripe size. + * + * @param size_rows Maximum stripe size, in rows to be set */ void set_stripe_size_rows(size_type size_rows) { @@ -549,6 +596,8 @@ class orc_writer_options { * @brief Sets the row index stride. * * Rounded down to a multiple of 8. + * + * @param stride Row index stride to be set */ void set_row_index_stride(size_type stride) { @@ -581,6 +630,9 @@ class orc_writer_options { } }; +/** + * @brief Builds settings to use for `write_orc()`. + */ class orc_writer_options_builder { orc_writer_options options; @@ -712,6 +764,8 @@ class orc_writer_options_builder { * @brief move orc_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `orc_writer_options` object's r-value reference */ orc_writer_options&& build() { return std::move(options); } }; @@ -789,31 +843,43 @@ class chunked_orc_writer_options { /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression type. + * + * @return Compression type */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns granularity of statistics collection. + * + * @return Granularity of statistics collection */ [[nodiscard]] statistics_freq get_statistics_freq() const { return _stats_freq; } /** * @brief Returns maximum stripe size, in bytes. + * + * @return Maximum stripe size, in bytes */ [[nodiscard]] auto get_stripe_size_bytes() const { return _stripe_size_bytes; } /** * @brief Returns maximum stripe size, in rows. + * + * @return Maximum stripe size, in rows */ [[nodiscard]] auto get_stripe_size_rows() const { return _stripe_size_rows; } /** * @brief Returns the row index stride. + * + * @return Row index stride */ auto get_row_index_stride() const { @@ -823,11 +889,15 @@ class chunked_orc_writer_options { /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ [[nodiscard]] std::map const& get_key_value_metadata() const { @@ -857,6 +927,8 @@ class chunked_orc_writer_options { /** * @brief Sets the maximum stripe size, in bytes. + * + * @param size_bytes Maximum stripe size, in bytes to be set */ void set_stripe_size_bytes(size_t size_bytes) { @@ -869,6 +941,8 @@ class chunked_orc_writer_options { * * If the stripe size is smaller that the row group size, row group size will be reduced to math * the stripe size. + * + * @param size_rows Maximum stripe size, in rows to be set */ void set_stripe_size_rows(size_type size_rows) { @@ -880,6 +954,8 @@ class chunked_orc_writer_options { * @brief Sets the row index stride. * * Rounded down to a multiple of 8. + * + * @param stride Row index stride to be set */ void set_row_index_stride(size_type stride) { @@ -905,6 +981,9 @@ class chunked_orc_writer_options { } }; +/** + * @brief Builds settings to use for `write_orc_chunked()`. + */ class chunked_orc_writer_options_builder { chunked_orc_writer_options options; @@ -1022,6 +1101,8 @@ class chunked_orc_writer_options_builder { * @brief move chunked_orc_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `chunked_orc_writer_options` object's r-value reference */ chunked_orc_writer_options&& build() { return std::move(options); } }; @@ -1077,7 +1158,7 @@ class orc_chunked_writer { */ void close(); - // Unique pointer to impl writer class + /// Unique pointer to impl writer class std::unique_ptr writer; }; diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp index 807fab2e85c..e5b89cc0f91 100644 --- a/cpp/include/cudf/io/orc_metadata.hpp +++ b/cpp/include/cudf/io/orc_metadata.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,9 +40,9 @@ namespace io { * contains one element per stripe, where each element contains column statistics for each column. */ struct raw_orc_statistics { - std::vector column_names; - std::vector file_stats; - std::vector> stripes_stats; + std::vector column_names; ///< Column names + std::vector file_stats; ///< File-level statistics for each column + std::vector> stripes_stats; ///< Stripe-level statistics for each column }; /** @@ -74,8 +74,8 @@ using no_statistics = std::monostate; */ template struct minmax_statistics { - std::optional minimum; - std::optional maximum; + std::optional minimum; ///< Minimum value + std::optional maximum; ///< Maximum value }; /** @@ -85,7 +85,7 @@ struct minmax_statistics { */ template struct sum_statistics { - std::optional sum; + std::optional sum; ///< Sum of values in column }; /** @@ -116,7 +116,7 @@ struct string_statistics : minmax_statistics, sum_statistics count; + std::vector count; ///< Count of `false` and `true` values }; /** @@ -144,8 +144,8 @@ using binary_statistics = sum_statistics; * the UNIX epoch. The `minimum_utc` and `maximum_utc` are the same values adjusted to UTC. */ struct timestamp_statistics : minmax_statistics { - std::optional minimum_utc; - std::optional maximum_utc; + std::optional minimum_utc; ///< minimum in milliseconds + std::optional maximum_utc; ///< maximum in milliseconds }; namespace orc { @@ -162,7 +162,7 @@ struct column_statistics; * have additional statistics, accessible through `type_specific_stats` accessor. */ struct column_statistics { - std::optional number_of_values; + std::optional number_of_values; ///< number of statistics std::variant - type_specific_stats; + type_specific_stats; ///< type-specific statistics + /** + * @brief Construct a new column statistics object + * + * @param detail_statistics The statistics to initialize the object with + */ column_statistics(cudf::io::orc::column_statistics&& detail_statistics); }; @@ -185,9 +190,9 @@ struct column_statistics { * column. */ struct parsed_orc_statistics { - std::vector column_names; - std::vector file_stats; - std::vector> stripes_stats; + std::vector column_names; ///< column names + std::vector file_stats; ///< file-level statistics + std::vector> stripes_stats; ///< stripe-level statistics }; /** From 61c7eacb62b88ec0ce15a5a4f52360c1716a3c03 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 20 May 2022 14:16:07 +0530 Subject: [PATCH 5/8] fix doxygen warnings in cudf/io/parquet.hpp --- cpp/include/cudf/io/parquet.hpp | 96 ++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index d44f15f99f7..149865a1ced 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -37,12 +37,9 @@ namespace io { * @file */ -constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; // 128MB -constexpr size_type default_row_group_size_rows = 1000000; +constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; ///< 128MB per row group +constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group -/** - * @brief Builds parquet_reader_options to use for `read_parquet()`. - */ class parquet_reader_options_builder; /** @@ -95,12 +92,16 @@ class parquet_reader_options { /** * @brief Returns source info. + * + * @return Source info */ [[nodiscard]] source_info const& get_source() const { return _source; } /** * @brief Returns true/false depending on whether strings should be converted to categories or * not. + * + * @return True/false depending on whether strings should be converted to categories or not. */ [[nodiscard]] bool is_enabled_convert_strings_to_categories() const { @@ -109,31 +110,43 @@ class parquet_reader_options { /** * @brief Returns true/false depending whether to use pandas metadata or not while reading. + * + * @return True/false depending whether to use pandas metadata or not while reading. */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } /** * @brief Returns number of rows to skip from the start. + * + * @return Number of rows to skip from the start. */ [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of rows to read. + * + * @return Number of rows to read. */ [[nodiscard]] size_type get_num_rows() const { return _num_rows; } /** * @brief Returns names of column to be read. + * + * @return Names of column to be read. */ [[nodiscard]] std::vector const& get_columns() const { return _columns; } /** * @brief Returns list of individual row groups to be read. + * + * @return List of individual row groups to be read. */ std::vector> const& get_row_groups() const { return _row_groups; } /** * @brief Returns timestamp type used to cast timestamp columns. + * + * @return Timestamp type used to cast timestamp columns. */ data_type get_timestamp_type() const { return _timestamp_type; } @@ -208,6 +221,9 @@ class parquet_reader_options { void set_timestamp_type(data_type type) { _timestamp_type = type; } }; +/** + * @brief Builds parquet_reader_options to use for `read_parquet()`. + */ class parquet_reader_options_builder { parquet_reader_options options; @@ -319,6 +335,8 @@ class parquet_reader_options_builder { * @brief move parquet_reader_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `parquet_reader_options` object's r-value reference */ parquet_reader_options&& build() { return std::move(options); } }; @@ -350,9 +368,6 @@ table_with_metadata read_parquet( * @file */ -/** - * @brief Class to build `parquet_writer_options`. - */ class parquet_writer_options_builder; /** @@ -394,7 +409,7 @@ class parquet_writer_options { { } - friend class parquet_writer_options_builder; + friend parquet_writer_options_builder; public: /** @@ -423,36 +438,50 @@ class parquet_writer_options { /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression format used. + * + * @return Compression format */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns level of statistics requested in output file. + * + * @return level of statistics requested in output file */ [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; } /** * @brief Returns table_view. + * + * @return Table view */ [[nodiscard]] table_view get_table() const { return _table; } /** * @brief Returns partitions. + * + * @return Partitions */ [[nodiscard]] std::vector const& get_partitions() const { return _partitions; } /** * @brief Returns associated metadata. + * + * @return Associated metadata */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ std::vector> const& get_key_value_metadata() const { @@ -461,11 +490,15 @@ class parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 + * + * @return True if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } /** * @brief Returns Column chunks file paths to be set in the raw output metadata. + * + * @return Column chunks file paths to be set in the raw output metadata */ std::vector const& get_column_chunks_file_paths() const { @@ -474,11 +507,15 @@ class parquet_writer_options { /** * @brief Returns maximum row group size, in bytes. + * + * @return Maximum row group size, in bytes */ auto get_row_group_size_bytes() const { return _row_group_size_bytes; } /** * @brief Returns maximum row group size, in rows. + * + * @return Maximum row group size, in rows */ auto get_row_group_size_rows() const { return _row_group_size_rows; } @@ -551,6 +588,8 @@ class parquet_writer_options { /** * @brief Sets the maximum row group size, in bytes. + * + * @param size_bytes Maximum row group size, in bytes to set */ void set_row_group_size_bytes(size_t size_bytes) { @@ -562,6 +601,8 @@ class parquet_writer_options { /** * @brief Sets the maximum row group size, in rows. + * + * @param size_rows Maximum row group size, in rows to set */ void set_row_group_size_rows(size_type size_rows) { @@ -572,6 +613,9 @@ class parquet_writer_options { } }; +/** + * @brief Class to build `parquet_writer_options`. + */ class parquet_writer_options_builder { parquet_writer_options options; @@ -720,6 +764,8 @@ class parquet_writer_options_builder { * @brief move parquet_writer_options member once it's built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `parquet_writer_options` object's r-value reference */ parquet_writer_options&& build() { return std::move(options); } }; @@ -757,9 +803,6 @@ std::unique_ptr> write_parquet( std::unique_ptr> merge_row_group_metadata( const std::vector>>& metadata_list); -/** - * @brief Builds options for chunked_parquet_writer_options. - */ class chunked_parquet_writer_options_builder; /** @@ -803,26 +846,36 @@ class chunked_parquet_writer_options { /** * @brief Returns sink info. + * + * @return Sink info */ [[nodiscard]] sink_info const& get_sink() const { return _sink; } /** * @brief Returns compression format used. + * + * @return Compression format */ [[nodiscard]] compression_type get_compression() const { return _compression; } /** * @brief Returns level of statistics requested in output file. + * + * @return Level of statistics requested in output file */ [[nodiscard]] statistics_freq get_stats_level() const { return _stats_level; } /** * @brief Returns metadata information. + * + * @return Metadata information */ [[nodiscard]] table_input_metadata const* get_metadata() const { return _metadata; } /** * @brief Returns Key-Value footer metadata information. + * + * @return Key-Value footer metadata information */ std::vector> const& get_key_value_metadata() const { @@ -831,16 +884,22 @@ class chunked_parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 + * + * @return True if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } /** * @brief Returns maximum row group size, in bytes. + * + * @return Maximum row group size, in bytes */ auto get_row_group_size_bytes() const { return _row_group_size_bytes; } /** * @brief Returns maximum row group size, in rows. + * + * @return Maximum row group size, in rows. */ auto get_row_group_size_rows() const { return _row_group_size_rows; } @@ -887,6 +946,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum row group size, in bytes. + * + * @param size_bytes Maximum row group size, in bytes to set */ void set_row_group_size_bytes(size_t size_bytes) { @@ -898,6 +959,8 @@ class chunked_parquet_writer_options { /** * @brief Sets the maximum row group size, in rows. + * + * @param size_rows The maximum row group size, in rows to set */ void set_row_group_size_rows(size_type size_rows) { @@ -917,6 +980,9 @@ class chunked_parquet_writer_options { static chunked_parquet_writer_options_builder builder(sink_info const& sink); }; +/** + * @brief Builds options for chunked_parquet_writer_options. + */ class chunked_parquet_writer_options_builder { chunked_parquet_writer_options options; @@ -977,7 +1043,7 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets compression type to chunked_parquet_writer_options. * - * compression The compression type to use. + * @param compression The compression type to use. * @return this for chaining. */ chunked_parquet_writer_options_builder& compression(compression_type compression) @@ -1034,6 +1100,8 @@ class chunked_parquet_writer_options_builder { * @brief move chunked_parquet_writer_options member once it's is built. * * This has been added since Cython does not support overloading of conversion operators. + * + * @return Built `chunked_parquet_writer_options` object's r-value reference */ chunked_parquet_writer_options&& build() { return std::move(options); } }; @@ -1099,7 +1167,7 @@ class parquet_chunked_writer { std::unique_ptr> close( std::vector const& column_chunks_file_paths = {}); - // Unique pointer to impl writer class + /// Unique pointer to impl writer class std::unique_ptr writer; }; From a7dba131efe0429efdac6c1f40f8f64e215c89a2 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Sat, 21 May 2022 20:14:43 +0530 Subject: [PATCH 6/8] address review comments (PointKernel) --- cpp/include/cudf/io/csv.hpp | 18 +++++++++--------- cpp/include/cudf/io/json.hpp | 2 +- cpp/include/cudf/io/orc.hpp | 6 +++--- cpp/include/cudf/io/parquet.hpp | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 9216ac3147c..7d899b4fa3f 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -239,7 +239,7 @@ class csv_reader_options { /** * @brief Whether to rename duplicate column names. * - * @return Boolean indicating whether to rename duplicate column names + * @return `true` if duplicate column names are renamed. */ [[nodiscard]] bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; } @@ -326,28 +326,28 @@ class csv_reader_options { /** * @brief Whether to treat `\r\n` as line terminator. * - * @return Boolean indicating whether to treat `\r\n` as line terminator + * @return `true` if `\r\n` is treated as line terminator */ [[nodiscard]] bool is_enabled_windowslinetermination() const { return _windowslinetermination; } /** * @brief Whether to treat whitespace as field delimiter. * - * @return Boolean indicating whether to treat whitespace as field delimiter + * @return `true` if whitespace is treated as field delimiter */ [[nodiscard]] bool is_enabled_delim_whitespace() const { return _delim_whitespace; } /** * @brief Whether to skip whitespace after the delimiter. * - * @return Boolean indicating whether to skip whitespace after the delimiter + * @return `true` if whitespace is skipped after the delimiter */ [[nodiscard]] bool is_enabled_skipinitialspace() const { return _skipinitialspace; } /** * @brief Whether to ignore empty lines or parse line values as invalid. * - * @return Boolean indicating whether to ignore empty lines or parse line values as invalid + * @return `true` if empty lines or parse line values are ignored as invalid */ [[nodiscard]] bool is_enabled_skip_blank_lines() const { return _skip_blank_lines; } @@ -368,7 +368,7 @@ class csv_reader_options { /** * @brief Whether a quote inside a value is double-quoted. * - * @return Boolean indicating whether a quote inside a value is double-quoted + * @return `true` if a quote inside a value is double-quoted */ [[nodiscard]] bool is_enabled_doublequote() const { return _doublequote; } @@ -443,14 +443,14 @@ class csv_reader_options { /** * @brief Whether to keep the built-in default NA values. * - * @return Boolean indicating whether to keep the built-in default NA values + * @return `true` if the built-in default NA values are kept */ bool is_enabled_keep_default_na() const { return _keep_default_na; } /** * @brief Whether to disable null filter. * - * @return Boolean indicating whether to disable null filter + * @return `true` if null filter is enabled */ bool is_enabled_na_filter() const { return _na_filter; } @@ -1401,7 +1401,7 @@ class csv_writer_options { /** * @brief Whether to write headers to csv. * - * @return Boolean value indicating whether to write headers to csv. + * @return `true` if writing headers to csv. */ [[nodiscard]] bool is_enabled_include_header() const { return _include_header; } diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index d9397015ea3..7adef6e2342 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -182,7 +182,7 @@ class json_reader_options { /** * @brief Whether to read the file as a json object per line. * - * @return Boolean indicating whether to read the file as a json object per line + * @return `true` if reading the file as a json object per line */ bool is_enabled_lines() const { return _lines; } diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 6ae6b45dc7e..0ca320eefed 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -133,14 +133,14 @@ class orc_reader_options { /** * @brief Whether to use row index to speed-up reading. * - * @return Boolean indicating whether to use row index to speed-up reading + * @return `true` if row index is used to speed-up reading */ bool is_enabled_use_index() const { return _use_index; } /** * @brief Whether to use numpy-compatible dtypes. * - * @return Boolean indicating whether to use numpy-compatible dtypes + * @return `true` if numpy-compatible dtypes are used */ bool is_enabled_use_np_dtypes() const { return _use_np_dtypes; } @@ -483,7 +483,7 @@ class orc_writer_options { /** * @brief Whether writing column statistics is enabled/disabled. * - * @return Boolean indicating whether writing column statistics is enabled/disabled + * @return `true` if writing column statistics is enabled */ [[nodiscard]] bool is_enabled_statistics() const { diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 149865a1ced..ae3a90b2f08 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -101,7 +101,7 @@ class parquet_reader_options { * @brief Returns true/false depending on whether strings should be converted to categories or * not. * - * @return True/false depending on whether strings should be converted to categories or not. + * @return `true` if strings should be converted to categories */ [[nodiscard]] bool is_enabled_convert_strings_to_categories() const { @@ -111,7 +111,7 @@ class parquet_reader_options { /** * @brief Returns true/false depending whether to use pandas metadata or not while reading. * - * @return True/false depending whether to use pandas metadata or not while reading. + * @return `true` if pandas metadata is used while reading. */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } @@ -491,7 +491,7 @@ class parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 * - * @return True if timestamps will be written as INT96 + * @return `true` if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } @@ -885,7 +885,7 @@ class chunked_parquet_writer_options { /** * @brief Returns `true` if timestamps will be written as INT96 * - * @return True if timestamps will be written as INT96 + * @return `true` if timestamps will be written as INT96 */ bool is_enabled_int96_timestamps() const { return _write_timestamps_as_int96; } From b3eaf2eb958e1ebbca46fbeb7dd29aed4b4aa2d3 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 25 May 2022 01:44:36 +0530 Subject: [PATCH 7/8] remove dot at end of param, return --- cpp/include/cudf/io/avro.hpp | 32 ++-- cpp/include/cudf/io/csv.hpp | 300 ++++++++++++++++---------------- cpp/include/cudf/io/json.hpp | 48 ++--- cpp/include/cudf/io/orc.hpp | 140 +++++++-------- cpp/include/cudf/io/parquet.hpp | 156 ++++++++--------- 5 files changed, 338 insertions(+), 338 deletions(-) diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 21468bea131..17c168f38d4 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -54,7 +54,7 @@ class avro_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read avro file. + * @param src source information used to read avro file */ explicit avro_reader_options(source_info const& src) : _source(src) {} @@ -99,29 +99,29 @@ class avro_reader_options { /** * @brief Set names of the column to be read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. + * @param val Number of rows to skip from start */ void set_skip_rows(size_type val) { _skip_rows = val; } /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. + * @param val Number of rows to read after skip */ void set_num_rows(size_type val) { _num_rows = val; } /** * @brief create avro_reader_options_builder which will build avro_reader_options. * - * @param src source information used to read avro file. - * @returns builder to build reader options. + * @param src source information used to read avro file + * @returns builder to build reader options */ static avro_reader_options_builder builder(source_info const& src); }; @@ -143,15 +143,15 @@ class avro_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read avro file. + * @param src The source information used to read avro file */ explicit avro_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Set names of the column to be read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ avro_reader_options_builder& columns(std::vector col_names) { @@ -162,8 +162,8 @@ class avro_reader_options_builder { /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. - * @return this for chaining. + * @param val Number of rows to skip from start + * @return this for chaining */ avro_reader_options_builder& skip_rows(size_type val) { @@ -174,8 +174,8 @@ class avro_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. - * @return this for chaining. + * @param val Number of rows to read after skip + * @return this for chaining */ avro_reader_options_builder& num_rows(size_type val) { @@ -208,11 +208,11 @@ class avro_reader_options_builder { * auto result = cudf::io::read_avro(options); * @endcode * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource used to allocate device memory of the table in the returned. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_avro( avro_reader_options const& options, diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 7d899b4fa3f..f43952c7153 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -134,7 +134,7 @@ class csv_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read csv file. + * @param src source information used to read csv file */ explicit csv_reader_options(source_info const& src) : _source(src) {} @@ -151,8 +151,8 @@ class csv_reader_options { /** * @brief Creates a `csv_reader_options_builder` which will build `csv_reader_options`. * - * @param src Source information to read csv file. - * @return Builder to build reader options. + * @param src Source information to read csv file + * @return Builder to build reader options */ static csv_reader_options_builder builder(source_info const& src); @@ -239,7 +239,7 @@ class csv_reader_options { /** * @brief Whether to rename duplicate column names. * - * @return `true` if duplicate column names are renamed. + * @return `true` if duplicate column names are renamed */ [[nodiscard]] bool is_enabled_mangle_dupe_cols() const { return _mangle_dupe_cols; } @@ -457,7 +457,7 @@ class csv_reader_options { /** * @brief Whether to parse dates as DD/MM versus MM/DD. * - * @return True if dates are parsed as DD/MM, false if MM/DD. + * @return True if dates are parsed as DD/MM, false if MM/DD */ bool is_enabled_dayfirst() const { return _dayfirst; } @@ -471,14 +471,14 @@ class csv_reader_options { /** * @brief Sets compression format of the source. * - * @param comp Compression type. + * @param comp Compression type */ void set_compression(compression_type comp) { _compression = comp; } /** * @brief Sets number of bytes to skip from source start. * - * @param offset Number of bytes of offset. + * @param offset Number of bytes of offset */ void set_byte_range_offset(std::size_t offset) { @@ -493,7 +493,7 @@ class csv_reader_options { /** * @brief Sets number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read */ void set_byte_range_size(std::size_t size) { @@ -508,28 +508,28 @@ class csv_reader_options { /** * @brief Sets names of the column. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_names(std::vector col_names) { _names = std::move(col_names); } /** * @brief Sets prefix to be used for column ID. * - * @param pfx String used as prefix in for each column name. + * @param pfx String used as prefix in for each column name */ void set_prefix(std::string pfx) { _prefix = pfx; } /** * @brief Sets whether to rename duplicate column names. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_mangle_dupe_cols(bool val) { _mangle_dupe_cols = val; } /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names that are needed. + * @param col_names Vector of column names that are needed */ void set_use_cols_names(std::vector col_names) { @@ -539,7 +539,7 @@ class csv_reader_options { /** * @brief Sets indexes of columns to read. * - * @param col_indices Vector of column indices that are needed. + * @param col_indices Vector of column indices that are needed */ void set_use_cols_indexes(std::vector col_indices) { @@ -549,7 +549,7 @@ class csv_reader_options { /** * @brief Sets number of rows to read. * - * @param nrows Number of rows to read. + * @param nrows Number of rows to read */ void set_nrows(size_type nrows) { @@ -565,7 +565,7 @@ class csv_reader_options { /** * @brief Sets number of rows to skip from start. * - * @param skip Number of rows to skip. + * @param skip Number of rows to skip */ void set_skiprows(size_type skip) { @@ -579,7 +579,7 @@ class csv_reader_options { /** * @brief Sets number of rows to skip from end. * - * @param skip Number of rows to skip. + * @param skip Number of rows to skip */ void set_skipfooter(size_type skip) { @@ -595,98 +595,98 @@ class csv_reader_options { /** * @brief Sets header row index. * - * @param hdr Index where header row is located. + * @param hdr Index where header row is located */ void set_header(size_type hdr) { _header = hdr; } /** * @brief Sets line terminator * - * @param term A character to indicate line termination. + * @param term A character to indicate line termination */ void set_lineterminator(char term) { _lineterminator = term; } /** * @brief Sets field delimiter. * - * @param delim A character to indicate delimiter. + * @param delim A character to indicate delimiter */ void set_delimiter(char delim) { _delimiter = delim; } /** * @brief Sets numeric data thousands separator. * - * @param val A character that separates thousands. + * @param val A character that separates thousands */ void set_thousands(char val) { _thousands = val; } /** * @brief Sets decimal point character. * - * @param val A character that indicates decimal values. + * @param val A character that indicates decimal values */ void set_decimal(char val) { _decimal = val; } /** * @brief Sets comment line start character. * - * @param val A character that indicates comment. + * @param val A character that indicates comment */ void set_comment(char val) { _comment = val; } /** * @brief Sets whether to treat `\r\n` as line terminator. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_windowslinetermination(bool val) { _windowslinetermination = val; } /** * @brief Sets whether to treat whitespace as field delimiter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_delim_whitespace(bool val) { _delim_whitespace = val; } /** * @brief Sets whether to skip whitespace after the delimiter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_skipinitialspace(bool val) { _skipinitialspace = val; } /** * @brief Sets whether to ignore empty lines or parse line values as invalid. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_skip_blank_lines(bool val) { _skip_blank_lines = val; } /** * @brief Sets quoting style. * - * @param style Quoting style used. + * @param style Quoting style used */ void set_quoting(quote_style style) { _quoting = style; } /** * @brief Sets quoting character. * - * @param ch A character to indicate quoting. + * @param ch A character to indicate quoting */ void set_quotechar(char ch) { _quotechar = ch; } /** * @brief Sets a quote inside a value is double-quoted. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_doublequote(bool val) { _doublequote = val; } /** * @brief Sets names of columns to read as datetime. * - * @param col_names Vector of column names to infer as datetime. + * @param col_names Vector of column names to infer as datetime */ void set_parse_dates(std::vector col_names) { @@ -696,7 +696,7 @@ class csv_reader_options { /** * @brief Sets indexes of columns to read as datetime. * - * @param col_indices Vector of column indices to infer as datetime. + * @param col_indices Vector of column indices to infer as datetime */ void set_parse_dates(std::vector col_indices) { @@ -730,14 +730,14 @@ class csv_reader_options { /** * @brief Sets per-column types * - * @param types Vector specifying the columns' target data types. + * @param types Vector specifying the columns' target data types */ void set_dtypes(std::vector types) { _dtypes = std::move(types); } /** * @brief Sets additional values to recognize as boolean true values. * - * @param vals Vector of values to be considered to be `true`. + * @param vals Vector of values to be considered to be `true` */ void set_true_values(std::vector vals) { @@ -747,7 +747,7 @@ class csv_reader_options { /** * @brief Sets additional values to recognize as boolean false values. * - * @param vals Vector of values to be considered to be `false`. + * @param vals Vector of values to be considered to be `false` */ void set_false_values(std::vector vals) { @@ -757,7 +757,7 @@ class csv_reader_options { /** * @brief Sets additional values to recognize as null values. * - * @param vals Vector of values to be considered to be null. + * @param vals Vector of values to be considered to be null */ void set_na_values(std::vector vals) { @@ -771,14 +771,14 @@ class csv_reader_options { /** * @brief Sets whether to keep the built-in default NA values. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_keep_default_na(bool val) { _keep_default_na = val; } /** * @brief Sets whether to disable null filter. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_na_filter(bool val) { @@ -789,14 +789,14 @@ class csv_reader_options { /** * @brief Sets whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_dayfirst(bool val) { _dayfirst = val; } /** * @brief Sets timestamp_type to which all timestamp columns will be cast. * - * @param type Dtype to which all timestamp column will be cast. + * @param type Dtype to which all timestamp column will be cast */ void set_timestamp_type(data_type type) { _timestamp_type = type; } }; @@ -819,15 +819,15 @@ class csv_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read csv file. + * @param src The source information used to read csv file */ csv_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Sets compression format of the source. * - * @param comp Compression type. - * @return this for chaining. + * @param comp Compression type + * @return this for chaining */ csv_reader_options_builder& compression(compression_type comp) { @@ -838,8 +838,8 @@ class csv_reader_options_builder { /** * @brief Sets number of bytes to skip from source start. * - * @param offset Number of bytes of offset. - * @return this for chaining. + * @param offset Number of bytes of offset + * @return this for chaining */ csv_reader_options_builder& byte_range_offset(std::size_t offset) { @@ -850,8 +850,8 @@ class csv_reader_options_builder { /** * @brief Sets number of bytes to read. * - * @param size Number of bytes to read. - * @return this for chaining. + * @param size Number of bytes to read + * @return this for chaining */ csv_reader_options_builder& byte_range_size(std::size_t size) { @@ -862,8 +862,8 @@ class csv_reader_options_builder { /** * @brief Sets names of the column. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ csv_reader_options_builder& names(std::vector col_names) { @@ -874,8 +874,8 @@ class csv_reader_options_builder { /** * @brief Sets prefix to be used for column ID. * - * @param pfx String used as prefix in for each column name. - * @return this for chaining. + * @param pfx String used as prefix in for each column name + * @return this for chaining */ csv_reader_options_builder& prefix(std::string pfx) { @@ -886,8 +886,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to rename duplicate column names. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& mangle_dupe_cols(bool val) { @@ -898,8 +898,8 @@ class csv_reader_options_builder { /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names that are needed. - * @return this for chaining. + * @param col_names Vector of column names that are needed + * @return this for chaining */ csv_reader_options_builder& use_cols_names(std::vector col_names) { @@ -910,8 +910,8 @@ class csv_reader_options_builder { /** * @brief Sets indexes of columns to read. * - * @param col_indices Vector of column indices that are needed. - * @return this for chaining. + * @param col_indices Vector of column indices that are needed + * @return this for chaining */ csv_reader_options_builder& use_cols_indexes(std::vector col_indices) { @@ -922,8 +922,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param rows Number of rows to read. - * @return this for chaining. + * @param rows Number of rows to read + * @return this for chaining */ csv_reader_options_builder& nrows(size_type rows) { @@ -934,8 +934,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to skip from start. * - * @param skip Number of rows to skip. - * @return this for chaining. + * @param skip Number of rows to skip + * @return this for chaining */ csv_reader_options_builder& skiprows(size_type skip) { @@ -946,8 +946,8 @@ class csv_reader_options_builder { /** * @brief Sets number of rows to skip from end. * - * @param skip Number of rows to skip. - * @return this for chaining. + * @param skip Number of rows to skip + * @return this for chaining */ csv_reader_options_builder& skipfooter(size_type skip) { @@ -958,8 +958,8 @@ class csv_reader_options_builder { /** * @brief Sets header row index. * - * @param hdr Index where header row is located. - * @return this for chaining. + * @param hdr Index where header row is located + * @return this for chaining */ csv_reader_options_builder& header(size_type hdr) { @@ -970,8 +970,8 @@ class csv_reader_options_builder { /** * @brief Sets line terminator. * - * @param term A character to indicate line termination. - * @return this for chaining. + * @param term A character to indicate line termination + * @return this for chaining */ csv_reader_options_builder& lineterminator(char term) { @@ -982,8 +982,8 @@ class csv_reader_options_builder { /** * @brief Sets field delimiter * - * @param delim A character to indicate delimiter. - * @return this for chaining. + * @param delim A character to indicate delimiter + * @return this for chaining */ csv_reader_options_builder& delimiter(char delim) { @@ -994,8 +994,8 @@ class csv_reader_options_builder { /** * @brief Sets numeric data thousands separator. * - * @param val A character that separates thousands. - * @return this for chaining. + * @param val A character that separates thousands + * @return this for chaining */ csv_reader_options_builder& thousands(char val) { @@ -1006,8 +1006,8 @@ class csv_reader_options_builder { /** * @brief Sets decimal point character. * - * @param val A character that indicates decimal values. - * @return this for chaining. + * @param val A character that indicates decimal values + * @return this for chaining */ csv_reader_options_builder& decimal(char val) { @@ -1018,8 +1018,8 @@ class csv_reader_options_builder { /** * @brief Sets comment line start character. * - * @param val A character that indicates comment. - * @return this for chaining. + * @param val A character that indicates comment + * @return this for chaining */ csv_reader_options_builder& comment(char val) { @@ -1030,8 +1030,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to treat `\r\n` as line terminator. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& windowslinetermination(bool val) { @@ -1042,8 +1042,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to treat whitespace as field delimiter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& delim_whitespace(bool val) { @@ -1054,8 +1054,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to skip whitespace after the delimiter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& skipinitialspace(bool val) { @@ -1066,8 +1066,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to ignore empty lines or parse line values as invalid. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& skip_blank_lines(bool val) { @@ -1078,8 +1078,8 @@ class csv_reader_options_builder { /** * @brief Sets quoting style. * - * @param style Quoting style used. - * @return this for chaining. + * @param style Quoting style used + * @return this for chaining */ csv_reader_options_builder& quoting(quote_style style) { @@ -1090,8 +1090,8 @@ class csv_reader_options_builder { /** * @brief Sets quoting character. * - * @param ch A character to indicate quoting. - * @return this for chaining. + * @param ch A character to indicate quoting + * @return this for chaining */ csv_reader_options_builder& quotechar(char ch) { @@ -1102,8 +1102,8 @@ class csv_reader_options_builder { /** * @brief Sets a quote inside a value is double-quoted. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& doublequote(bool val) { @@ -1114,8 +1114,8 @@ class csv_reader_options_builder { /** * @brief Sets names of columns to read as datetime. * - * @param col_names Vector of column names to read as datetime. - * @return this for chaining. + * @param col_names Vector of column names to read as datetime + * @return this for chaining */ csv_reader_options_builder& parse_dates(std::vector col_names) { @@ -1127,7 +1127,7 @@ class csv_reader_options_builder { * @brief Sets indexes of columns to read as datetime. * * @param col_indices Vector of column indices to read as datetime - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_dates(std::vector col_indices) { @@ -1139,7 +1139,7 @@ class csv_reader_options_builder { * @brief Sets names of columns to parse as hexadecimal. * * @param col_names Vector of column names to parse as hexadecimal - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_hex(std::vector col_names) { @@ -1151,7 +1151,7 @@ class csv_reader_options_builder { * @brief Sets indexes of columns to parse as hexadecimal. * * @param col_indices Vector of column indices to parse as hexadecimal - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& parse_hex(std::vector col_indices) { @@ -1163,7 +1163,7 @@ class csv_reader_options_builder { * @brief Sets per-column types. * * @param types Column name -> data type map specifying the columns' target data types - * @return this for chaining. + * @return this for chaining */ csv_reader_options_builder& dtypes(std::map types) { @@ -1174,8 +1174,8 @@ class csv_reader_options_builder { /** * @brief Sets per-column types. * - * @param types Vector of data types in which the column needs to be read. - * @return this for chaining. + * @param types Vector of data types in which the column needs to be read + * @return this for chaining */ csv_reader_options_builder& dtypes(std::vector types) { @@ -1186,8 +1186,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as boolean true values. * - * @param vals Vector of values to be considered to be `true`. - * @return this for chaining. + * @param vals Vector of values to be considered to be `true` + * @return this for chaining */ csv_reader_options_builder& true_values(std::vector vals) { @@ -1198,8 +1198,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as boolean false values. * - * @param vals Vector of values to be considered to be `false`. - * @return this for chaining. + * @param vals Vector of values to be considered to be `false` + * @return this for chaining */ csv_reader_options_builder& false_values(std::vector vals) { @@ -1210,8 +1210,8 @@ class csv_reader_options_builder { /** * @brief Sets additional values to recognize as null values. * - * @param vals Vector of values to be considered to be null. - * @return this for chaining. + * @param vals Vector of values to be considered to be null + * @return this for chaining */ csv_reader_options_builder& na_values(std::vector vals) { @@ -1222,8 +1222,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to keep the built-in default NA values. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& keep_default_na(bool val) { @@ -1234,8 +1234,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to disable null filter. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& na_filter(bool val) { @@ -1246,8 +1246,8 @@ class csv_reader_options_builder { /** * @brief Sets whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_reader_options_builder& dayfirst(bool val) { @@ -1258,8 +1258,8 @@ class csv_reader_options_builder { /** * @brief Sets timestamp_type to which all timestamp columns will be cast. * - * @param type Dtype to which all timestamp column will be cast. - * @return this for chaining. + * @param type Dtype to which all timestamp column will be cast + * @return this for chaining */ csv_reader_options_builder& timestamp_type(data_type type) { @@ -1292,11 +1292,11 @@ class csv_reader_options_builder { * auto result = cudf::io::read_csv(options); * @endcode * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource used to allocate device memory of the table in the returned. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_csv( csv_reader_options options, @@ -1342,8 +1342,8 @@ class csv_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit csv_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table), _rows_per_chunk(table.num_rows()) @@ -1363,10 +1363,10 @@ class csv_writer_options { /** * @brief Create builder to create `csv_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build csv_writer_options. + * @return Builder to build csv_writer_options */ static csv_writer_options_builder builder(sink_info const& sink, table_view const& table); @@ -1401,28 +1401,28 @@ class csv_writer_options { /** * @brief Whether to write headers to csv. * - * @return `true` if writing headers to csv. + * @return `true` if writing headers to csv */ [[nodiscard]] bool is_enabled_include_header() const { return _include_header; } /** * @brief Returns maximum number of rows to process for each file write. * - * @return Maximum number of rows to process for each file write. + * @return Maximum number of rows to process for each file write */ [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; } /** * @brief Returns character used for separating lines. * - * @return Character used for separating lines. + * @return Character used for separating lines */ [[nodiscard]] std::string get_line_terminator() const { return _line_terminator; } /** * @brief Returns character used for separating lines. * - * @return Character used for separating lines. + * @return Character used for separating lines */ [[nodiscard]] char get_inter_column_delimiter() const { return _inter_column_delimiter; } @@ -1444,56 +1444,56 @@ class csv_writer_options { /** * @brief Sets optional associated metadata. * - @param metadata Associated metadata. + @param metadata Associated metadata */ void set_metadata(table_metadata* metadata) { _metadata = metadata; } /** * @brief Sets string to used for null entries. * - * @param val String to represent null value. + * @param val String to represent null value */ void set_na_rep(std::string val) { _na_rep = val; } /** * @brief Enables/Disables headers being written to csv. * - * @param val Boolean value to enable/disable. + * @param val Boolean value to enable/disable */ void enable_include_header(bool val) { _include_header = val; } /** * @brief Sets maximum number of rows to process for each file write. * - * @param val Number of rows per chunk. + * @param val Number of rows per chunk */ void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; } /** * @brief Sets character used for separating lines. * - * @param term Character to represent line termination. + * @param term Character to represent line termination */ void set_line_terminator(std::string term) { _line_terminator = term; } /** * @brief Sets character used for separating lines. * - * @param delim Character to indicate delimiting. + * @param delim Character to indicate delimiting */ void set_inter_column_delimiter(char delim) { _inter_column_delimiter = delim; } /** * @brief Sets string used for values != 0 in INT8 types. * - * @param val String to represent values != 0 in INT8 types. + * @param val String to represent values != 0 in INT8 types */ void set_true_value(std::string val) { _true_value = val; } /** * @brief Sets string used for values == 0 in INT8 types. * - * @param val String to represent values == 0 in INT8 types. + * @param val String to represent values == 0 in INT8 types */ void set_false_value(std::string val) { _false_value = val; } }; @@ -1515,8 +1515,8 @@ class csv_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit csv_writer_options_builder(sink_info const& sink, table_view const& table) : options{sink, table} @@ -1526,8 +1526,8 @@ class csv_writer_options_builder { /** * @brief Sets optional associated metadata. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ csv_writer_options_builder& metadata(table_metadata* metadata) { @@ -1538,8 +1538,8 @@ class csv_writer_options_builder { /** * @brief Sets string to used for null entries. * - * @param val String to represent null value. - * @return this for chaining. + * @param val String to represent null value + * @return this for chaining */ csv_writer_options_builder& na_rep(std::string val) { @@ -1550,8 +1550,8 @@ class csv_writer_options_builder { /** * @brief Enables/Disables headers being written to csv. * - * @param val Boolean value to enable/disable. - * @return this for chaining. + * @param val Boolean value to enable/disable + * @return this for chaining */ csv_writer_options_builder& include_header(bool val) { @@ -1562,8 +1562,8 @@ class csv_writer_options_builder { /** * @brief Sets maximum number of rows to process for each file write. * - * @param val Number of rows per chunk. - * @return this for chaining. + * @param val Number of rows per chunk + * @return this for chaining */ csv_writer_options_builder& rows_per_chunk(int val) { @@ -1574,8 +1574,8 @@ class csv_writer_options_builder { /** * @brief Sets character used for separating lines. * - * @param term Character to represent line termination. - * @return this for chaining. + * @param term Character to represent line termination + * @return this for chaining */ csv_writer_options_builder& line_terminator(std::string term) { @@ -1586,8 +1586,8 @@ class csv_writer_options_builder { /** * @brief Sets character used for separating lines. * - * @param delim Character to indicate delimiting. - * @return this for chaining. + * @param delim Character to indicate delimiting + * @return this for chaining */ csv_writer_options_builder& inter_column_delimiter(char delim) { @@ -1598,8 +1598,8 @@ class csv_writer_options_builder { /** * @brief Sets string used for values != 0 in INT8 types. * - * @param val String to represent values != 0 in INT8 types. - * @return this for chaining. + * @param val String to represent values != 0 in INT8 types + * @return this for chaining */ csv_writer_options_builder& true_value(std::string val) { @@ -1610,8 +1610,8 @@ class csv_writer_options_builder { /** * @brief Sets string used for values == 0 in INT8 types. * - * @param val String to represent values == 0 in INT8 types. - * @return this for chaining. + * @param val String to represent values == 0 in INT8 types + * @return this for chaining */ csv_writer_options_builder& false_value(std::string val) { @@ -1648,8 +1648,8 @@ class csv_writer_options_builder { * cudf::io::write_csv(options); * @endcode * - * @param options Settings for controlling writing behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling writing behavior + * @param mr Device memory resource to use for device memory allocation */ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 7adef6e2342..9ccb5ec4d58 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -83,7 +83,7 @@ class json_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read parquet file. + * @param src source information used to read parquet file */ explicit json_reader_options(const source_info& src) : _source(src) {} @@ -100,8 +100,8 @@ class json_reader_options { /** * @brief create json_reader_options_builder which will build json_reader_options. * - * @param src source information used to read json file. - * @returns builder to build the options. + * @param src source information used to read json file + * @returns builder to build the options */ static json_reader_options_builder builder(source_info const& src); @@ -115,7 +115,7 @@ class json_reader_options { /** * @brief Returns data types of the columns. * - * @returns Data types of the columns. + * @returns Data types of the columns */ std::variant, std::map> const& get_dtypes() const { @@ -189,7 +189,7 @@ class json_reader_options { /** * @brief Whether to parse dates as DD/MM versus MM/DD. * - * @returns true if dates are parsed as DD/MM, false if MM/DD. + * @returns true if dates are parsed as DD/MM, false if MM/DD */ bool is_enabled_dayfirst() const { return _dayfirst; } @@ -203,42 +203,42 @@ class json_reader_options { /** * @brief Set data types for columns to be read. * - * @param types Vector dtypes in string format. + * @param types Vector dtypes in string format */ void set_dtypes(std::map types) { _dtypes = std::move(types); } /** * @brief Set the compression type. * - * @param comp_type The compression type used. + * @param comp_type The compression type used */ void set_compression(compression_type comp_type) { _compression = comp_type; } /** * @brief Set number of bytes to skip from source start. * - * @param offset Number of bytes of offset. + * @param offset Number of bytes of offset */ void set_byte_range_offset(size_type offset) { _byte_range_offset = offset; } /** * @brief Set number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read */ void set_byte_range_size(size_type size) { _byte_range_size = size; } /** * @brief Set whether to read the file as a json object per line. * - * @param val Boolean value to enable/disable the option to read each line as a json object. + * @param val Boolean value to enable/disable the option to read each line as a json object */ void enable_lines(bool val) { _lines = val; } /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable day first parsing format. + * @param val Boolean value to enable/disable day first parsing format */ void enable_dayfirst(bool val) { _dayfirst = val; } }; @@ -260,7 +260,7 @@ class json_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read avro file. + * @param src The source information used to read avro file */ explicit json_reader_options_builder(source_info const& src) : options(src) {} @@ -279,7 +279,7 @@ class json_reader_options_builder { /** * @brief Set data types for columns to be read. * - * @param types Column name -> dtype map. + * @param types Column name -> dtype map * @return this for chaining */ json_reader_options_builder& dtypes(std::map types) @@ -291,8 +291,8 @@ class json_reader_options_builder { /** * @brief Set the compression type. * - * @param comp_type The compression type used. - * @return this for chaining. + * @param comp_type The compression type used + * @return this for chaining */ json_reader_options_builder& compression(compression_type comp_type) { @@ -303,8 +303,8 @@ class json_reader_options_builder { /** * @brief Set number of bytes to skip from source start. * - * @param offset Number of bytes of offset. - * @return this for chaining. + * @param offset Number of bytes of offset + * @return this for chaining */ json_reader_options_builder& byte_range_offset(size_type offset) { @@ -315,7 +315,7 @@ class json_reader_options_builder { /** * @brief Set number of bytes to read. * - * @param size Number of bytes to read. + * @param size Number of bytes to read * @return this for chaining */ json_reader_options_builder& byte_range_size(size_type size) @@ -327,8 +327,8 @@ class json_reader_options_builder { /** * @brief Set whether to read the file as a json object per line. * - * @param val Boolean value to enable/disable the option to read each line as a json object. - * @return this for chaining. + * @param val Boolean value to enable/disable the option to read each line as a json object + * @return this for chaining */ json_reader_options_builder& lines(bool val) { @@ -339,8 +339,8 @@ class json_reader_options_builder { /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * - * @param val Boolean value to enable/disable day first parsing format. - * @return this for chaining. + * @param val Boolean value to enable/disable day first parsing format + * @return this for chaining */ json_reader_options_builder& dayfirst(bool val) { @@ -373,11 +373,11 @@ class json_reader_options_builder { * auto result = cudf::io::read_json(options); * @endcode * - * @param options Settings for controlling reading behavior. + * @param options Settings for controlling reading behavior * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * - * @return The set of columns along with metadata. + * @return The set of columns along with metadata */ table_with_metadata read_json( json_reader_options options, diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 0ca320eefed..e9b6818099e 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -75,7 +75,7 @@ class orc_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read orc file. + * @param src source information used to read orc file */ explicit orc_reader_options(source_info const& src) : _source(src) {} @@ -90,8 +90,8 @@ class orc_reader_options { /** * @brief Creates `orc_reader_options_builder` which will build `orc_reader_options`. * - * @param src Source information to read orc file. - * @return Builder to build reader options. + * @param src Source information to read orc file + * @return Builder to build reader options */ static orc_reader_options_builder builder(source_info const& src); @@ -163,7 +163,7 @@ class orc_reader_options { /** * @brief Sets names of the column to read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } @@ -182,7 +182,7 @@ class orc_reader_options { /** * @brief Sets number of rows to skip from the start. * - * @param rows Number of rows. + * @param rows Number of rows */ void set_skip_rows(size_type rows) { @@ -193,7 +193,7 @@ class orc_reader_options { /** * @brief Sets number of row to read. * - * @param nrows Number of rows. + * @param nrows Number of rows */ void set_num_rows(size_type nrows) { @@ -204,28 +204,28 @@ class orc_reader_options { /** * @brief Enable/Disable use of row index to speed-up reading. * - * @param use Boolean value to enable/disable row index use. + * @param use Boolean value to enable/disable row index use */ void enable_use_index(bool use) { _use_index = use; } /** * @brief Enable/Disable use of numpy-compatible dtypes * - * @param use Boolean value to enable/disable. + * @param use Boolean value to enable/disable */ void enable_use_np_dtypes(bool use) { _use_np_dtypes = use; } /** * @brief Sets timestamp type to which timestamp column will be cast. * - * @param type Type of timestamp. + * @param type Type of timestamp */ void set_timestamp_type(data_type type) { _timestamp_type = type; } /** * @brief Set columns that should be read as 128-bit Decimal * - * @param val Vector of fully qualified column names. + * @param val Vector of fully qualified column names */ void set_decimal128_columns(std::vector val) { @@ -250,15 +250,15 @@ class orc_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read orc file. + * @param src The source information used to read orc file */ explicit orc_reader_options_builder(source_info const& src) : options{src} {}; /** * @brief Sets names of the column to read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ orc_reader_options_builder& columns(std::vector col_names) { @@ -270,7 +270,7 @@ class orc_reader_options_builder { * @brief Sets list of individual stripes to read per source * * @param stripes Vector of vectors, mapping stripes to read to input sources - * @return this for chaining. + * @return this for chaining */ orc_reader_options_builder& stripes(std::vector> stripes) { @@ -281,8 +281,8 @@ class orc_reader_options_builder { /** * @brief Sets number of rows to skip from the start. * - * @param rows Number of rows. - * @return this for chaining. + * @param rows Number of rows + * @return this for chaining */ orc_reader_options_builder& skip_rows(size_type rows) { @@ -293,8 +293,8 @@ class orc_reader_options_builder { /** * @brief Sets number of row to read. * - * @param nrows Number of rows. - * @return this for chaining. + * @param nrows Number of rows + * @return this for chaining */ orc_reader_options_builder& num_rows(size_type nrows) { @@ -305,8 +305,8 @@ class orc_reader_options_builder { /** * @brief Enable/Disable use of row index to speed-up reading. * - * @param use Boolean value to enable/disable row index use. - * @return this for chaining. + * @param use Boolean value to enable/disable row index use + * @return this for chaining */ orc_reader_options_builder& use_index(bool use) { @@ -317,8 +317,8 @@ class orc_reader_options_builder { /** * @brief Enable/Disable use of numpy-compatible dtypes. * - * @param use Boolean value to enable/disable. - * @return this for chaining. + * @param use Boolean value to enable/disable + * @return this for chaining */ orc_reader_options_builder& use_np_dtypes(bool use) { @@ -329,8 +329,8 @@ class orc_reader_options_builder { /** * @brief Sets timestamp type to which timestamp column will be cast. * - * @param type Type of timestamp. - * @return this for chaining. + * @param type Type of timestamp + * @return this for chaining */ orc_reader_options_builder& timestamp_type(data_type type) { @@ -341,8 +341,8 @@ class orc_reader_options_builder { /** * @brief Columns that should be read as 128-bit Decimal * - * @param val Vector of column names. - * @return this for chaining. + * @param val Vector of column names + * @return this for chaining */ orc_reader_options_builder& decimal128_columns(std::vector val) { @@ -378,11 +378,11 @@ class orc_reader_options_builder { * Note: Support for reading files with struct columns is currently experimental, the output may not * be as reliable as reading for other datatypes. * - * @param options Settings for controlling reading behavior. + * @param options Settings for controlling reading behavior * @param mr Device memory resource used to allocate device memory of the table in the returned * table_with_metadata. * - * @return The set of columns. + * @return The set of columns */ table_with_metadata read_orc( orc_reader_options const& options, @@ -440,8 +440,8 @@ class orc_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit orc_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table) @@ -459,10 +459,10 @@ class orc_writer_options { /** * @brief Create builder to create `orc_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build `orc_writer_options`. + * @return Builder to build `orc_writer_options` */ static orc_writer_options_builder builder(sink_info const& sink, table_view const& table); @@ -551,7 +551,7 @@ class orc_writer_options { /** * @brief Sets compression type. * - * @param comp Compression type. + * @param comp Compression type */ void set_compression(compression_type comp) { _compression = comp; } @@ -563,7 +563,7 @@ class orc_writer_options { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. + * @param val Frequency of statistics collection */ void enable_statistics(statistics_freq val) { _stats_freq = val; } @@ -608,14 +608,14 @@ class orc_writer_options { /** * @brief Sets table to be written to output. * - * @param tbl Table for the output. + * @param tbl Table for the output */ void set_table(table_view tbl) { _table = tbl; } /** * @brief Sets associated metadata * - * @param meta Associated metadata. + * @param meta Associated metadata */ void set_metadata(table_input_metadata const* meta) { _metadata = meta; } @@ -647,8 +647,8 @@ class orc_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ orc_writer_options_builder(sink_info const& sink, table_view const& table) : options{sink, table} { @@ -657,8 +657,8 @@ class orc_writer_options_builder { /** * @brief Sets compression type. * - * @param comp The compression type to use. - * @return this for chaining. + * @param comp The compression type to use + * @return this for chaining */ orc_writer_options_builder& compression(compression_type comp) { @@ -674,8 +674,8 @@ class orc_writer_options_builder { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Level of statistics collection. - * @return this for chaining. + * @param val Level of statistics collection + * @return this for chaining */ orc_writer_options_builder& enable_statistics(statistics_freq val) { @@ -687,7 +687,7 @@ class orc_writer_options_builder { * @brief Sets the maximum stripe size, in bytes. * * @param val maximum stripe size - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& stripe_size_bytes(size_t val) { @@ -699,7 +699,7 @@ class orc_writer_options_builder { * @brief Sets the maximum number of rows in output stripes. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& stripe_size_rows(size_type val) { @@ -711,7 +711,7 @@ class orc_writer_options_builder { * @brief Sets the row index stride. * * @param val new row index stride - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& row_index_stride(size_type val) { @@ -722,8 +722,8 @@ class orc_writer_options_builder { /** * @brief Sets table to be written to output. * - * @param tbl Table for the output. - * @return this for chaining. + * @param tbl Table for the output + * @return this for chaining */ orc_writer_options_builder& table(table_view tbl) { @@ -734,8 +734,8 @@ class orc_writer_options_builder { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. - * @return this for chaining. + * @param meta Associated metadata + * @return this for chaining */ orc_writer_options_builder& metadata(table_input_metadata const* meta) { @@ -747,7 +747,7 @@ class orc_writer_options_builder { * @brief Sets Key-Value footer metadata. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ orc_writer_options_builder& key_value_metadata(std::map metadata) { @@ -783,8 +783,8 @@ class orc_writer_options_builder { * Note: Support for writing tables with struct columns is currently experimental, the output may * not be as reliable as writing for other datatypes. * - * @param options Settings for controlling reading behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling reading behavior + * @param mr Device memory resource to use for device memory allocation */ void write_orc(orc_writer_options const& options, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); @@ -820,7 +820,7 @@ class chunked_orc_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ chunked_orc_writer_options(sink_info const& sink) : _sink(sink) {} @@ -835,9 +835,9 @@ class chunked_orc_writer_options { /** * @brief Create builder to create `chunked_orc_writer_options`. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output * - * @return Builder to build chunked_orc_writer_options. + * @return Builder to build chunked_orc_writer_options */ static chunked_orc_writer_options_builder builder(sink_info const& sink); @@ -909,7 +909,7 @@ class chunked_orc_writer_options { /** * @brief Sets compression type. * - * @param comp The compression type to use. + * @param comp The compression type to use */ void set_compression(compression_type comp) { _compression = comp; } @@ -921,7 +921,7 @@ class chunked_orc_writer_options { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. + * @param val Frequency of statistics collection */ void enable_statistics(statistics_freq val) { _stats_freq = val; } @@ -966,7 +966,7 @@ class chunked_orc_writer_options { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. + * @param meta Associated metadata */ void metadata(table_input_metadata const* meta) { _metadata = meta; } @@ -998,15 +998,15 @@ class chunked_orc_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ explicit chunked_orc_writer_options_builder(sink_info const& sink) : options{sink} {} /** * @brief Sets compression type. * - * @param comp The compression type to use. - * @return this for chaining. + * @param comp The compression type to use + * @return this for chaining */ chunked_orc_writer_options_builder& compression(compression_type comp) { @@ -1022,8 +1022,8 @@ class chunked_orc_writer_options_builder { * - cudf::io::ORC_STATISTICS_STRIPE: Statistics are collected for each ORC stripe. * - cudf::io::ORC_STATISTICS_ROWGROUP: Statistics are collected for each ORC row group. * - * @param val Frequency of statistics collection. - * @return this for chaining. + * @param val Frequency of statistics collection + * @return this for chaining */ chunked_orc_writer_options_builder& enable_statistics(statistics_freq val) { @@ -1035,7 +1035,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the maximum stripe size, in bytes. * * @param val maximum stripe size - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& stripe_size_bytes(size_t val) { @@ -1047,7 +1047,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the maximum number of rows in output stripes. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& stripe_size_rows(size_type val) { @@ -1059,7 +1059,7 @@ class chunked_orc_writer_options_builder { * @brief Sets the row index stride. * * @param val new row index stride - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& row_index_stride(size_type val) { @@ -1070,8 +1070,8 @@ class chunked_orc_writer_options_builder { /** * @brief Sets associated metadata. * - * @param meta Associated metadata. - * @return this for chaining. + * @param meta Associated metadata + * @return this for chaining */ chunked_orc_writer_options_builder& metadata(table_input_metadata const* meta) { @@ -1083,7 +1083,7 @@ class chunked_orc_writer_options_builder { * @brief Sets Key-Value footer metadata. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ chunked_orc_writer_options_builder& key_value_metadata( std::map metadata) diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index ae3a90b2f08..be1decfaedd 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -68,7 +68,7 @@ class parquet_reader_options { /** * @brief Constructor from source info. * - * @param src source information used to read parquet file. + * @param src source information used to read parquet file */ explicit parquet_reader_options(source_info const& src) : _source(src) {} @@ -85,8 +85,8 @@ class parquet_reader_options { /** * @brief Creates a parquet_reader_options_builder which will build parquet_reader_options. * - * @param src Source information to read parquet file. - * @return Builder to build reader options. + * @param src Source information to read parquet file + * @return Builder to build reader options */ static parquet_reader_options_builder builder(source_info const& src); @@ -111,56 +111,56 @@ class parquet_reader_options { /** * @brief Returns true/false depending whether to use pandas metadata or not while reading. * - * @return `true` if pandas metadata is used while reading. + * @return `true` if pandas metadata is used while reading */ [[nodiscard]] bool is_enabled_use_pandas_metadata() const { return _use_pandas_metadata; } /** * @brief Returns number of rows to skip from the start. * - * @return Number of rows to skip from the start. + * @return Number of rows to skip from the start */ [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; } /** * @brief Returns number of rows to read. * - * @return Number of rows to read. + * @return Number of rows to read */ [[nodiscard]] size_type get_num_rows() const { return _num_rows; } /** * @brief Returns names of column to be read. * - * @return Names of column to be read. + * @return Names of column to be read */ [[nodiscard]] std::vector const& get_columns() const { return _columns; } /** * @brief Returns list of individual row groups to be read. * - * @return List of individual row groups to be read. + * @return List of individual row groups to be read */ std::vector> const& get_row_groups() const { return _row_groups; } /** * @brief Returns timestamp type used to cast timestamp columns. * - * @return Timestamp type used to cast timestamp columns. + * @return Timestamp type used to cast timestamp columns */ data_type get_timestamp_type() const { return _timestamp_type; } /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names. + * @param col_names Vector of column names */ void set_columns(std::vector col_names) { _columns = std::move(col_names); } /** * @brief Sets vector of individual row groups to read. * - * @param row_groups Vector of row groups to read. + * @param row_groups Vector of row groups to read */ void set_row_groups(std::vector> row_groups) { @@ -174,21 +174,21 @@ class parquet_reader_options { /** * @brief Sets to enable/disable conversion of strings to categories. * - * @param val Boolean value to enable/disable conversion of string columns to categories. + * @param val Boolean value to enable/disable conversion of string columns to categories */ void enable_convert_strings_to_categories(bool val) { _convert_strings_to_categories = val; } /** * @brief Sets to enable/disable use of pandas metadata to read. * - * @param val Boolean value whether to use pandas metadata. + * @param val Boolean value whether to use pandas metadata */ void enable_use_pandas_metadata(bool val) { _use_pandas_metadata = val; } /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. + * @param val Number of rows to skip from start */ void set_skip_rows(size_type val) { @@ -202,7 +202,7 @@ class parquet_reader_options { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. + * @param val Number of rows to read after skip */ void set_num_rows(size_type val) { @@ -216,7 +216,7 @@ class parquet_reader_options { /** * @brief Sets timestamp_type used to cast timestamp columns. * - * @param type The timestamp data_type to which all timestamp columns need to be cast. + * @param type The timestamp data_type to which all timestamp columns need to be cast */ void set_timestamp_type(data_type type) { _timestamp_type = type; } }; @@ -238,15 +238,15 @@ class parquet_reader_options_builder { /** * @brief Constructor from source info. * - * @param src The source information used to read parquet file. + * @param src The source information used to read parquet file */ explicit parquet_reader_options_builder(source_info const& src) : options(src) {} /** * @brief Sets names of the columns to be read. * - * @param col_names Vector of column names. - * @return this for chaining. + * @param col_names Vector of column names + * @return this for chaining */ parquet_reader_options_builder& columns(std::vector col_names) { @@ -257,8 +257,8 @@ class parquet_reader_options_builder { /** * @brief Sets vector of individual row groups to read. * - * @param row_groups Vector of row groups to read. - * @return this for chaining. + * @param row_groups Vector of row groups to read + * @return this for chaining */ parquet_reader_options_builder& row_groups(std::vector> row_groups) { @@ -269,8 +269,8 @@ class parquet_reader_options_builder { /** * @brief Sets enable/disable conversion of strings to categories. * - * @param val Boolean value to enable/disable conversion of string columns to categories. - * @return this for chaining. + * @param val Boolean value to enable/disable conversion of string columns to categories + * @return this for chaining */ parquet_reader_options_builder& convert_strings_to_categories(bool val) { @@ -281,8 +281,8 @@ class parquet_reader_options_builder { /** * @brief Sets to enable/disable use of pandas metadata to read. * - * @param val Boolean value whether to use pandas metadata. - * @return this for chaining. + * @param val Boolean value whether to use pandas metadata + * @return this for chaining */ parquet_reader_options_builder& use_pandas_metadata(bool val) { @@ -293,8 +293,8 @@ class parquet_reader_options_builder { /** * @brief Sets number of rows to skip. * - * @param val Number of rows to skip from start. - * @return this for chaining. + * @param val Number of rows to skip from start + * @return this for chaining */ parquet_reader_options_builder& skip_rows(size_type val) { @@ -305,8 +305,8 @@ class parquet_reader_options_builder { /** * @brief Sets number of rows to read. * - * @param val Number of rows to read after skip. - * @return this for chaining. + * @param val Number of rows to read after skip + * @return this for chaining */ parquet_reader_options_builder& num_rows(size_type val) { @@ -317,8 +317,8 @@ class parquet_reader_options_builder { /** * @brief timestamp_type used to cast timestamp columns. * - * @param type The timestamp data_type to which all timestamp columns need to be cast. - * @return this for chaining. + * @param type The timestamp data_type to which all timestamp columns need to be cast + * @return this for chaining */ parquet_reader_options_builder& timestamp_type(data_type type) { @@ -401,8 +401,8 @@ class parquet_writer_options { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit parquet_writer_options(sink_info const& sink, table_view const& table) : _sink(sink), _table(table) @@ -422,17 +422,17 @@ class parquet_writer_options { /** * @brief Create builder to create `parquet_writer_options`. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output * - * @return Builder to build parquet_writer_options. + * @return Builder to build parquet_writer_options */ static parquet_writer_options_builder builder(sink_info const& sink, table_view const& table); /** * @brief Create builder to create `parquet_writer_options`. * - * @return parquet_writer_options_builder. + * @return parquet_writer_options_builder */ static parquet_writer_options_builder builder(); @@ -535,7 +535,7 @@ class parquet_writer_options { /** * @brief Sets metadata. * - * @param metadata Associated metadata. + * @param metadata Associated metadata */ void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } @@ -554,14 +554,14 @@ class parquet_writer_options { /** * @brief Sets the level of statistics. * - * @param sf Level of statistics requested in the output file. + * @param sf Level of statistics requested in the output file */ void set_stats_level(statistics_freq sf) { _stats_level = sf; } /** * @brief Sets compression type. * - * @param compression The compression type to use. + * @param compression The compression type to use */ void set_compression(compression_type compression) { _compression = compression; } @@ -630,8 +630,8 @@ class parquet_writer_options_builder { /** * @brief Constructor from sink and table. * - * @param sink The sink used for writer output. - * @param table Table to be written to output. + * @param sink The sink used for writer output + * @param table Table to be written to output */ explicit parquet_writer_options_builder(sink_info const& sink, table_view const& table) : options(sink, table) @@ -643,7 +643,7 @@ class parquet_writer_options_builder { * * @param partitions Partitions of input table in {start_row, num_rows} pairs. If specified, must * be same size as number of sinks in sink_info - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& partitions(std::vector partitions) { @@ -656,8 +656,8 @@ class parquet_writer_options_builder { /** * @brief Sets metadata in parquet_writer_options. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { @@ -669,7 +669,7 @@ class parquet_writer_options_builder { * @brief Sets Key-Value footer metadata in parquet_writer_options. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& key_value_metadata( std::vector> metadata) @@ -683,8 +683,8 @@ class parquet_writer_options_builder { /** * @brief Sets the level of statistics in parquet_writer_options. * - * @param sf Level of statistics requested in the output file. - * @return this for chaining. + * @param sf Level of statistics requested in the output file + * @return this for chaining */ parquet_writer_options_builder& stats_level(statistics_freq sf) { @@ -695,8 +695,8 @@ class parquet_writer_options_builder { /** * @brief Sets compression type in parquet_writer_options. * - * @param compression The compression type to use. - * @return this for chaining. + * @param compression The compression type to use + * @return this for chaining */ parquet_writer_options_builder& compression(compression_type compression) { @@ -709,7 +709,7 @@ class parquet_writer_options_builder { * * @param file_paths Vector of Strings which indicates file path. Must be same size as number of * data sinks - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& column_chunks_file_paths(std::vector file_paths) { @@ -723,7 +723,7 @@ class parquet_writer_options_builder { * @brief Sets the maximum row group size, in bytes. * * @param val maximum row group size - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& row_group_size_bytes(size_t val) { @@ -735,7 +735,7 @@ class parquet_writer_options_builder { * @brief Sets the maximum number of rows in output row groups. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ parquet_writer_options_builder& row_group_size_rows(size_type val) { @@ -746,8 +746,8 @@ class parquet_writer_options_builder { /** * @brief Sets whether int96 timestamps are written or not in parquet_writer_options. * - * @param enabled Boolean value to enable/disable int96 timestamps. - * @return this for chaining. + * @param enabled Boolean value to enable/disable int96 timestamps + * @return this for chaining */ parquet_writer_options_builder& int96_timestamps(bool enabled) { @@ -780,8 +780,8 @@ class parquet_writer_options_builder { * cudf::io::write_parquet(options); * @endcode * - * @param options Settings for controlling writing behavior. - * @param mr Device memory resource to use for device memory allocation. + * @param options Settings for controlling writing behavior + * @param mr Device memory resource to use for device memory allocation * * @return A blob that contains the file metadata (parquet FileMetadata thrift message) if * requested in parquet_writer_options (empty blob otherwise). @@ -797,8 +797,8 @@ std::unique_ptr> write_parquet( * * @ingroup io_writers * - * @param[in] metadata_list List of input file metadata. - * @return A parquet-compatible blob that contains the data for all row groups in the list. + * @param[in] metadata_list List of input file metadata + * @return A parquet-compatible blob that contains the data for all row groups in the list */ std::unique_ptr> merge_row_group_metadata( const std::vector>>& metadata_list); @@ -830,7 +830,7 @@ class chunked_parquet_writer_options { /** * @brief Constructor from sink. * - * @param sink Sink used for writer output. + * @param sink Sink used for writer output */ explicit chunked_parquet_writer_options(sink_info const& sink) : _sink(sink) {} @@ -899,14 +899,14 @@ class chunked_parquet_writer_options { /** * @brief Returns maximum row group size, in rows. * - * @return Maximum row group size, in rows. + * @return Maximum row group size, in rows */ auto get_row_group_size_rows() const { return _row_group_size_rows; } /** * @brief Sets metadata. * - * @param metadata Associated metadata. + * @param metadata Associated metadata */ void set_metadata(table_input_metadata const* metadata) { _metadata = metadata; } @@ -925,14 +925,14 @@ class chunked_parquet_writer_options { /** * @brief Sets the level of statistics in parquet_writer_options. * - * @param sf Level of statistics requested in the output file. + * @param sf Level of statistics requested in the output file */ void set_stats_level(statistics_freq sf) { _stats_level = sf; } /** * @brief Sets compression type. * - * @param compression The compression type to use. + * @param compression The compression type to use */ void set_compression(compression_type compression) { _compression = compression; } @@ -973,9 +973,9 @@ class chunked_parquet_writer_options { /** * @brief creates builder to build chunked_parquet_writer_options. * - * @param sink sink to use for writer output. + * @param sink sink to use for writer output * - * @return Builder to build `chunked_parquet_writer_options`. + * @return Builder to build `chunked_parquet_writer_options` */ static chunked_parquet_writer_options_builder builder(sink_info const& sink); }; @@ -997,15 +997,15 @@ class chunked_parquet_writer_options_builder { /** * @brief Constructor from sink. * - * @param sink The sink used for writer output. + * @param sink The sink used for writer output */ chunked_parquet_writer_options_builder(sink_info const& sink) : options(sink){}; /** * @brief Sets metadata to chunked_parquet_writer_options. * - * @param metadata Associated metadata. - * @return this for chaining. + * @param metadata Associated metadata + * @return this for chaining */ chunked_parquet_writer_options_builder& metadata(table_input_metadata const* metadata) { @@ -1017,7 +1017,7 @@ class chunked_parquet_writer_options_builder { * @brief Sets Key-Value footer metadata in parquet_writer_options. * * @param metadata Key-Value footer metadata - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& key_value_metadata( std::vector> metadata) @@ -1031,8 +1031,8 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets Sets the level of statistics in chunked_parquet_writer_options. * - * @param sf Level of statistics requested in the output file. - * @return this for chaining. + * @param sf Level of statistics requested in the output file + * @return this for chaining */ chunked_parquet_writer_options_builder& stats_level(statistics_freq sf) { @@ -1043,8 +1043,8 @@ class chunked_parquet_writer_options_builder { /** * @brief Sets compression type to chunked_parquet_writer_options. * - * @param compression The compression type to use. - * @return this for chaining. + * @param compression The compression type to use + * @return this for chaining */ chunked_parquet_writer_options_builder& compression(compression_type compression) { @@ -1058,8 +1058,8 @@ class chunked_parquet_writer_options_builder { * not an internal type for cudf, it needs to be written for backwards * compatibility reasons. * - * @param enabled Boolean value to enable/disable int96 timestamps. - * @return this for chaining. + * @param enabled Boolean value to enable/disable int96 timestamps + * @return this for chaining */ chunked_parquet_writer_options_builder& int96_timestamps(bool enabled) { @@ -1071,7 +1071,7 @@ class chunked_parquet_writer_options_builder { * @brief Sets the maximum row group size, in bytes. * * @param val maximum row group size - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& row_group_size_bytes(size_t val) { @@ -1083,7 +1083,7 @@ class chunked_parquet_writer_options_builder { * @brief Sets the maximum number of rows in output row groups. * * @param val maximum number or rows - * @return this for chaining. + * @return this for chaining */ chunked_parquet_writer_options_builder& row_group_size_rows(size_type val) { From 1962a24c6f30faff75ae20a0224aa7ab969db9b3 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Wed, 25 May 2022 13:19:38 +0530 Subject: [PATCH 8/8] fix merge issues --- conda/recipes/libcudf/meta.yaml | 1 - cpp/include/cudf/io/parquet.hpp | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index b6c531ffde7..61ac5be6f50 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -183,7 +183,6 @@ outputs: - test -f $PREFIX/include/cudf/lists/gather.hpp - test -f $PREFIX/include/cudf/lists/list_view.hpp - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp - - test -f $PREFIX/include/cudf/lists/list_view.hpp - test -f $PREFIX/include/cudf/lists/sorting.hpp - test -f $PREFIX/include/cudf/lists/stream_compaction.hpp - test -f $PREFIX/include/cudf/merge.hpp diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 4d5817806be..27821fe5526 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -38,9 +38,9 @@ namespace io { */ constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; ///< 128MB per row group -constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group +constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group constexpr size_t default_max_page_size_bytes = 512 * 1024; ///< 512KB per page -constexpr size_type default_max_page_size_rows = 20000; ///< 20k rows per page +constexpr size_type default_max_page_size_rows = 20000; ///< 20k rows per page class parquet_reader_options_builder;