Skip to content

Commit

Permalink
Derive Default for WriterProperties (apache#4268)
Browse files Browse the repository at this point in the history
* Derive Default for WriterProperties

* Review feedback
  • Loading branch information
tustvold authored May 24, 2023
1 parent 98919ff commit 56437cc
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 43 deletions.
5 changes: 2 additions & 3 deletions parquet/src/arrow/arrow_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,11 +1746,10 @@ mod tests {

{
// Write using low-level parquet API (#1167)
let writer_props = Arc::new(WriterProperties::builder().build());
let mut writer = SerializedFileWriter::new(
file.try_clone().unwrap(),
schema,
writer_props,
Default::default(),
)
.unwrap();

Expand Down Expand Up @@ -2288,7 +2287,7 @@ mod tests {
}
";
let schema = Arc::new(parse_message_type(MESSAGE_TYPE).unwrap());
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();

let mut buf = Vec::with_capacity(1024);
let mut writer = SerializedFileWriter::new(&mut buf, schema, props).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/arrow_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl<W: Write> ArrowWriter<W> {
) -> Result<Self> {
let schema = arrow_to_parquet_schema(&arrow_schema)?;
// add serialized arrow schema
let mut props = props.unwrap_or_else(|| WriterProperties::builder().build());
let mut props = props.unwrap_or_default();
add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);

let max_row_group_size = props.max_row_group_size();
Expand Down
5 changes: 1 addition & 4 deletions parquet/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@
//!
//! let file = File::create("data.parquet").unwrap();
//!
//! // Default writer properties
//! let props = WriterProperties::builder().build();
//!
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), Some(props)).unwrap();
//! let mut writer = ArrowWriter::try_new(file, batch.schema(), None).unwrap();
//!
//! writer.write(&batch).expect("Writing batch");
//!
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/column/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@
//! }
//! ";
//! let schema = Arc::new(parse_message_type(message_type).unwrap());
//! let props = Arc::new(WriterProperties::builder().build());
//! let file = fs::File::create(path).unwrap();
//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
//! let mut writer = SerializedFileWriter::new(file, schema, Default::default()).unwrap();
//!
//! let mut row_group_writer = writer.next_row_group().unwrap();
//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
Expand Down
30 changes: 15 additions & 15 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ mod tests {
#[test]
fn test_column_writer_inconsistent_def_rep_length() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 1, props);
let res = writer.write_batch(&[1, 2, 3, 4], Some(&[1, 1, 1]), Some(&[0, 0]));
assert!(res.is_err());
Expand All @@ -1146,7 +1146,7 @@ mod tests {
#[test]
fn test_column_writer_invalid_def_levels() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
let res = writer.write_batch(&[1, 2, 3, 4], None, None);
assert!(res.is_err());
Expand All @@ -1161,7 +1161,7 @@ mod tests {
#[test]
fn test_column_writer_invalid_rep_levels() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 1, props);
let res = writer.write_batch(&[1, 2, 3, 4], None, None);
assert!(res.is_err());
Expand All @@ -1176,7 +1176,7 @@ mod tests {
#[test]
fn test_column_writer_not_enough_values_to_write() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 1, 0, props);
let res = writer.write_batch(&[1, 2], Some(&[1, 1, 1, 1]), None);
assert!(res.is_err());
Expand All @@ -1191,7 +1191,7 @@ mod tests {
#[test]
fn test_column_writer_write_only_one_dictionary_page() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
// First page should be correctly written.
Expand Down Expand Up @@ -1499,7 +1499,7 @@ mod tests {
#[test]
fn test_column_writer_check_metadata() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();

Expand Down Expand Up @@ -1535,7 +1535,7 @@ mod tests {
#[test]
fn test_column_writer_check_byte_array_min_max() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer =
get_test_decimals_column_writer::<ByteArrayType>(page_writer, 0, 0, props);
writer
Expand Down Expand Up @@ -1591,7 +1591,7 @@ mod tests {
#[test]
fn test_column_writer_uint32_converted_type_min_max() {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_unsigned_int_given_as_converted_column_writer::<
Int32Type,
>(page_writer, 0, 0, props);
Expand Down Expand Up @@ -1664,7 +1664,7 @@ mod tests {
let mut buf = Vec::with_capacity(100);
let mut write = TrackedWrite::new(&mut buf);
let page_writer = Box::new(SerializedPageWriter::new(&mut write));
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);

writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
Expand Down Expand Up @@ -1772,25 +1772,25 @@ mod tests {

#[test]
fn test_column_writer_empty_column_roundtrip() {
let props = WriterProperties::builder().build();
let props = Default::default();
column_roundtrip::<Int32Type>(props, &[], None, None);
}

#[test]
fn test_column_writer_non_nullable_values_roundtrip() {
let props = WriterProperties::builder().build();
let props = Default::default();
column_roundtrip_random::<Int32Type>(props, 1024, i32::MIN, i32::MAX, 0, 0);
}

#[test]
fn test_column_writer_nullable_non_repeated_values_roundtrip() {
let props = WriterProperties::builder().build();
let props = Default::default();
column_roundtrip_random::<Int32Type>(props, 1024, i32::MIN, i32::MAX, 10, 0);
}

#[test]
fn test_column_writer_nullable_repeated_values_roundtrip() {
let props = WriterProperties::builder().build();
let props = Default::default();
column_roundtrip_random::<Int32Type>(props, 1024, i32::MIN, i32::MAX, 10, 10);
}

Expand Down Expand Up @@ -2121,7 +2121,7 @@ mod tests {
// write data
// and check the offset index and column index
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<Int32Type>(page_writer, 0, 0, props);
writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
// first page
Expand Down Expand Up @@ -2433,7 +2433,7 @@ mod tests {
/// Write data into parquet using [`get_test_page_writer`] and [`get_test_column_writer`] and returns generated statistics.
fn statistics_roundtrip<T: DataType>(values: &[<T as DataType>::T]) -> Statistics {
let page_writer = get_test_page_writer();
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = get_test_column_writer::<T>(page_writer, 0, 0, props);
writer.write_batch(values, None, None).unwrap();

Expand Down
3 changes: 1 addition & 2 deletions parquet/src/file/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@
//! }
//! ";
//! let schema = Arc::new(parse_message_type(message_type).unwrap());
//! let props = Arc::new(WriterProperties::builder().build());
//! let file = fs::File::create(&path).unwrap();
//! let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
//! let mut writer = SerializedFileWriter::new(file, schema, Default::default()).unwrap();
//! let mut row_group_writer = writer.next_row_group().unwrap();
//! while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
//! // ... write values to a column writer
Expand Down
17 changes: 15 additions & 2 deletions parquet/src/file/properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
//! };
//!
//! // Create properties with default configuration.
//! let props = WriterProperties::builder().build();
//! let props = WriterProperties::default();
//!
//! // Use properties builder to set certain options and assemble the configuration.
//! let props = WriterProperties::builder()
Expand Down Expand Up @@ -130,7 +130,20 @@ pub struct WriterProperties {
sorting_columns: Option<Vec<SortingColumn>>,
}

impl Default for WriterProperties {
fn default() -> Self {
Self::builder().build()
}
}

impl WriterProperties {
/// Create a new [`WriterProperties`] with the default settings
///
/// See [`WriterProperties::builder`] for customising settings
pub fn new() -> Self {
Self::default()
}

/// Returns builder for writer properties with default values.
pub fn builder() -> WriterPropertiesBuilder {
WriterPropertiesBuilder::with_defaults()
Expand Down Expand Up @@ -836,7 +849,7 @@ mod tests {

#[test]
fn test_writer_properties_default_settings() {
let props = WriterProperties::builder().build();
let props = WriterProperties::default();
assert_eq!(props.data_pagesize_limit(), DEFAULT_PAGE_SIZE);
assert_eq!(
props.dictionary_pagesize_limit(),
Expand Down
10 changes: 3 additions & 7 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,6 @@ mod tests {
use crate::file::page_index::index_reader::{
read_columns_indexes, read_pages_locations,
};
use crate::file::properties::WriterProperties;
use crate::file::writer::SerializedFileWriter;
use crate::record::RowAccessor;
use crate::schema::parser::parse_message_type;
Expand Down Expand Up @@ -1716,12 +1715,9 @@ mod tests {

let schema = parse_message_type(message_type).unwrap();
let mut out = Vec::with_capacity(1024);
let mut writer = SerializedFileWriter::new(
&mut out,
Arc::new(schema),
Arc::new(WriterProperties::builder().build()),
)
.unwrap();
let mut writer =
SerializedFileWriter::new(&mut out, Arc::new(schema), Default::default())
.unwrap();

let mut r = writer.next_row_group().unwrap();
let mut c = r.next_column().unwrap().unwrap();
Expand Down
8 changes: 4 additions & 4 deletions parquet/src/file/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ mod tests {
.build()
.unwrap(),
);
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
let row_group_writer = writer.next_row_group().unwrap();
let res = row_group_writer.close();
Expand Down Expand Up @@ -860,7 +860,7 @@ mod tests {
.build()
.unwrap(),
);
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
let mut row_group_writer = writer.next_row_group().unwrap();

Expand Down Expand Up @@ -898,7 +898,7 @@ mod tests {
.build()
.unwrap(),
);
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let writer =
SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
writer.close().unwrap();
Expand Down Expand Up @@ -1575,7 +1575,7 @@ mod tests {
";

let schema = Arc::new(parse_message_type(message_type).unwrap());
let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer = SerializedFileWriter::new(vec![], schema, props).unwrap();
let mut row_group_writer = writer.next_row_group().unwrap();

Expand Down
3 changes: 1 addition & 2 deletions parquet_derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ mod parquet_field;
///
/// let schema = samples.as_slice().schema();
///
/// let props = Arc::new(WriterProperties::builder().build());
/// let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
/// let mut writer = SerializedFileWriter::new(file, schema, Default::default()).unwrap();
///
/// let mut row_group = writer.next_row_group().unwrap();
/// samples.as_slice().write_to_row_group(&mut row_group).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion parquet_derive_test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ mod tests {

assert_eq!(&schema, &generated_schema);

let props = Arc::new(WriterProperties::builder().build());
let props = Default::default();
let mut writer =
SerializedFileWriter::new(file, generated_schema, props).unwrap();

Expand Down

0 comments on commit 56437cc

Please sign in to comment.