Skip to content

Commit

Permalink
Improve serialization performance (#2165)
Browse files Browse the repository at this point in the history
Signed-off-by: Judah Rand <[email protected]>
  • Loading branch information
judahrand authored Dec 21, 2021
1 parent f279a7d commit b8daefa
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
26 changes: 18 additions & 8 deletions sdk/python/feast/infra/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,14 @@ def _convert_arrow_to_proto(
feature_view: FeatureView,
join_keys: List[str],
) -> List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]]:
# Avoid ChunkedArrays which guarentees `zero_copy_only` availiable.
if isinstance(table, pyarrow.Table):
table = table.to_batches()[0]

# Handle join keys
join_key_values = {k: table.column(k).to_pylist() for k in join_keys}
join_key_values = {
k: table.column(k).to_numpy(zero_copy_only=False) for k in join_keys
}
entity_keys = [
EntityKeyProto(
join_keys=join_keys,
Expand All @@ -317,7 +323,7 @@ def _convert_arrow_to_proto(
feature_dict = {
feature.name: [
python_value_to_proto_value(val, feature.dtype)
for val in table.column(feature.name).to_pylist()
for val in table.column(feature.name).to_numpy(zero_copy_only=False)
]
for feature in feature_view.features
}
Expand All @@ -326,18 +332,22 @@ def _convert_arrow_to_proto(
# Convert event_timestamps
event_timestamps = [
_coerce_datetime(val)
for val in table.column(
feature_view.batch_source.event_timestamp_column
).to_pylist()
for val in pandas.to_datetime(
table.column(feature_view.batch_source.event_timestamp_column).to_numpy(
zero_copy_only=False
)
)
]

# Convert created_timestamps if they exist
if feature_view.batch_source.created_timestamp_column:
created_timestamps = [
_coerce_datetime(val)
for val in table.column(
feature_view.batch_source.created_timestamp_column
).to_pylist()
for val in pandas.to_datetime(
table.column(
feature_view.batch_source.created_timestamp_column
).to_numpy(zero_copy_only=False)
)
]
else:
created_timestamps = [None] * table.num_rows
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/feast/type_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _type_err(item, dtype):
"double_list_val",
[np.float64, np.float32, float],
),
ValueType.INT32_LIST: (Int32List, "int32_list_val", [np.int32, int]),
ValueType.INT32_LIST: (Int32List, "int32_list_val", [np.int64, np.int32, int]),
ValueType.INT64_LIST: (Int64List, "int64_list_val", [np.int64, np.int32, int]),
ValueType.UNIX_TIMESTAMP_LIST: (
Int64List,
Expand All @@ -234,7 +234,7 @@ def _type_err(item, dtype):
ValueType.DOUBLE: ("double_val", lambda x: x, {float, np.float64}),
ValueType.STRING: ("string_val", lambda x: str(x), None),
ValueType.BYTES: ("bytes_val", lambda x: x, {bytes}),
ValueType.BOOL: ("bool_val", lambda x: x, {bool}),
ValueType.BOOL: ("bool_val", lambda x: x, {bool, np.bool_}),
}


Expand Down

0 comments on commit b8daefa

Please sign in to comment.