From 828168170d8827d626a1ab509cd29f7c89b623a0 Mon Sep 17 00:00:00 2001 From: dvanbrug Date: Fri, 13 May 2022 15:59:58 -0400 Subject: [PATCH] Use ParquetDataset for Schema Inference Updates to use ParquetDataset instead of ParquetFile to do schema inference. This supports both single files and directories of partitioned parquet datasets. Signed-off-by: Dirk Van Bruggen --- sdk/python/feast/infra/offline_stores/file_source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index 8b4b08bb87..5c2a521ac0 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -3,7 +3,7 @@ from pyarrow._fs import FileSystem from pyarrow._s3fs import S3FileSystem -from pyarrow.parquet import ParquetFile +from pyarrow.parquet import ParquetDataset from feast import type_map from feast.data_format import FileFormat, ParquetFormat @@ -179,9 +179,9 @@ def get_table_column_names_and_types( filesystem, path = FileSource.create_filesystem_and_path( self.path, self.file_options.s3_endpoint_override ) - schema = ParquetFile( + schema = ParquetDataset( path if filesystem is None else filesystem.open_input_file(path) - ).schema_arrow + ).schema.to_arrow_schema() return zip(schema.names, map(str, schema.types)) @staticmethod