From 641ae21bd997a442d8c077920a2de1e8fe535336 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Nov 2023 10:33:32 +0100 Subject: [PATCH 1/2] Expand error message to point to alternatives --- src/pyarrow_hotfix/__init__.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/pyarrow_hotfix/__init__.py b/src/pyarrow_hotfix/__init__.py index 83808d6..0333213 100644 --- a/src/pyarrow_hotfix/__init__.py +++ b/src/pyarrow_hotfix/__init__.py @@ -2,6 +2,27 @@ # # SPDX-License-Identifier: Apache-2.0 + +_ERROR_MSG = """\ +Disallowed deserialization of 'arrow.py_extension_type': +storage_type = {storage_type} +serialized = {serialized} +pickle disassembly:\n{pickle_disassembly} + +Reading of untrusted Parquet or Feather files with a PyExtensionType column +allows arbitrary code execution. +If you trust this file, you can enable reading the extension type by one of: + +- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)` +- disable this error by running `import pyarrow_hotfix; pyarrow_hotfix.uninstall()` + +We strongly recommend updating your Parquet/Feather files to use extension types +derived from `pyarrow.ExtensionType` instead, and register this type explicitly. +See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types +for more details. +""" + + def install(): import atexit import pyarrow as pa @@ -24,10 +45,12 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized): out = io.StringIO() pickletools.dis(serialized, out) raise RuntimeError( - "forbidden deserialization of 'arrow.py_extension_type': " - "storage_type = %s, serialized = %r, " - "pickle disassembly:\n%s" - % (storage_type, serialized, out.getvalue())) + _ERROR_MSG.format( + storage_type=storage_type, + serialized=serialized, + pickle_disassembly=out.getvalue(), + ) + ) if hasattr(pa, "unregister_extension_type"): # 0.15.0 <= PyArrow From 570942b6b9a381f8c705789c04977c48ce7411b0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 13 Nov 2023 08:35:59 +0100 Subject: [PATCH 2/2] update test --- tests/test_in_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_in_process.py b/tests/test_in_process.py index fe358aa..c7c25f9 100644 --- a/tests/test_in_process.py +++ b/tests/test_in_process.py @@ -40,7 +40,7 @@ def assert_hotfix_functional(capsys, func, *args, **kwargs): expected_schema = pa.schema([pa.field('ext', pa.null())]) assert table.schema.equals(expected_schema, check_metadata=False) else: - expected = "forbidden deserialization of 'arrow.py_extension_type'" + expected = "Disallowed deserialization of 'arrow.py_extension_type'" with pytest.raises(RuntimeError, match=expected): func(*args, **kwargs) captured = capsys.readouterr()