Skip to content

Commit

Permalink
feat: add CSV (text) file support (#646)
Browse files Browse the repository at this point in the history
First version of CSV options (inspired by various engines/libraries)
  • Loading branch information
EpsilonPrime authored Aug 10, 2024
1 parent e41eff2 commit 5d49e04
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion proto/substrait/algebra.proto
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,41 @@ message ReadRel {
message ArrowReadOptions {}
message OrcReadOptions {}
message DwrfReadOptions {}
message DelimiterSeparatedTextReadOptions {
// Delimiter separated files may be compressed. The reader should
// autodetect this and decompress as needed.

// The character(s) used to separate fields. Common values are comma,
// tab, and pipe. Multiple characters are allowed.
string field_delimiter = 1;
// The maximum number of bytes to read from a single line. If a line
// exceeds this limit the resulting behavior is undefined.
uint64 max_line_size = 2;
// The character(s) used to quote strings. Common values are single
// and double quotation marks.
string quote = 3;
// The number of lines to skip at the beginning of the file.
uint64 header_lines_to_skip = 4;
// The character used to escape characters in strings. Backslash is
// a common value. Note that a double quote mark can also be used as an
// escape character but the external quotes should be removed first.
string escape = 5;
// If this value is encountered (including empty string), the resulting
// value is null instead. Leave unset to disable. If this value is
// provided, the effective schema of this file is comprised entirely of
// nullable strings. If not provided, the effective schema is instead
// made up of non-nullable strings.
optional string value_treated_as_null = 6;
}

// The format of the files.
// The format of the files along with options for reading those files.
oneof file_format {
ParquetReadOptions parquet = 9;
ArrowReadOptions arrow = 10;
OrcReadOptions orc = 11;
google.protobuf.Any extension = 12;
DwrfReadOptions dwrf = 13;
DelimiterSeparatedTextReadOptions text = 14;
}
}
}
Expand Down

0 comments on commit 5d49e04

Please sign in to comment.