You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ValueError when running pipe.extract on pyarrow table with DictionaryArray field:
Traceback (most recent call last):
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 468, in extract
self._extract_source(
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 1238, in _extract_source
load_id = extract.extract(
File "/home/j/repos/dlt/dlt/extract/extract.py", line 417, in extract
self._extract_single_source(
File "/home/j/repos/dlt/dlt/extract/extract.py", line 349, in _extract_single_source
extractors[item_format].write_items(
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 333, in write_items
super().write_items(resource, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 138, in write_items
self._write_to_static_table(resource, table_name, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 340, in _write_to_static_table
super()._write_to_static_table(resource, table_name, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 217, in _write_to_static_table
items = self._compute_and_update_table(resource, table_name, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 455, in _compute_and_update_table
items = super()._compute_and_update_table(resource, table_name, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 236, in _compute_and_update_table
computed_table = self._compute_table(resource, items, meta)
File "/home/j/repos/dlt/dlt/extract/extractors.py", line 414, in _compute_table
arrow_table["columns"] = pyarrow.py_arrow_to_table_schema_columns(item.schema)
File "/home/j/repos/dlt/dlt/common/libs/pyarrow.py", line 402, in py_arrow_to_table_schema_columns
**get_column_type_from_py_arrow(field.type),
File "/home/j/repos/dlt/dlt/common/libs/pyarrow.py", line 187, in get_column_type_from_py_arrow
raise ValueError(dtype)
ValueError: dictionary<values=string, indices=int8, ordered=0>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/j/repos/dlt/mre.py", line 9, in <module>
pipe.extract(table, table_name="bug")
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 223, in _wrap
step_info = f(self, *args, **kwargs)
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 177, in _wrap
rv = f(self, *args, **kwargs)
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 163, in _wrap
return f(self, *args, **kwargs)
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 272, in _wrap
return f(self, *args, **kwargs)
File "/home/j/repos/dlt/dlt/pipeline/pipeline.py", line 489, in extract
raise PipelineStepFailed(
dlt.pipeline.exceptions.PipelineStepFailed: Pipeline execution failed at stage extract when processing package 1730705003.55613 with exception:
<class 'ValueError'>
dictionary<values=string, indices=int8, ordered=0>
Expected behavior
Successful extract.
Steps to reproduce
importdltimportpyarrowaspa# create pyarrow table with DictionaryArray fieldarray=pa.array(["a", "b", "c"], type=pa.dictionary(pa.int8(), pa.string()))
table=pa.table({"foo": array})
# try to extract with dlt pipelinepipe=dlt.pipeline(destination="filesystem")
pipe.extract(table, table_name="bug")
# result: <class 'ValueError'> dictionary<values=string, indices=int8, ordered=0>
Operating system
Linux
Runtime environment
Local
Python version
3.9
dlt data source
pyarrow table
dlt destination
Filesystem & buckets
Other deployment details
No response
Additional information
No response
The text was updated successfully, but these errors were encountered:
dlt version
dlt 1.3.1a1
Describe the problem
ValueError
when runningpipe.extract
on pyarrow table withDictionaryArray
field:Expected behavior
Successful extract.
Steps to reproduce
Operating system
Linux
Runtime environment
Local
Python version
3.9
dlt data source
pyarrow table
dlt destination
Filesystem & buckets
Other deployment details
No response
Additional information
No response
The text was updated successfully, but these errors were encountered: