Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Data] Hard deprecate FileExtensionFilter #43144

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 2 additions & 38 deletions python/ray/data/datasource/file_based_datasource.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import io
import pathlib
import posixpath
import warnings
from typing import (
Expand Down Expand Up @@ -77,51 +76,16 @@
@Deprecated
@PublicAPI(stability="beta")
class FileExtensionFilter(PathPartitionFilter):
"""A file-extension-based path filter that filters files that don't end
with the provided extension(s).

Attributes:
file_extensions: File extension(s) of files to be included in reading.
allow_if_no_extension: If this is True, files without any extensions
will be included in reading.

"""

def __init__(
self,
file_extensions: Union[str, List[str]],
allow_if_no_extension: bool = False,
):
warnings.warn(
raise DeprecationWarning(
"`FileExtensionFilter` is deprecated. Instead, set the `file_extensions` "
"parameter of `read_xxx()` APIs.",
DeprecationWarning,
"parameter of `read_xxx()` APIs."
)

if isinstance(file_extensions, str):
file_extensions = [file_extensions]

self.extensions = [f".{ext.lower()}" for ext in file_extensions]
self.allow_if_no_extension = allow_if_no_extension

def _file_has_extension(self, path: str):
suffixes = [suffix.lower() for suffix in pathlib.Path(path).suffixes]
if not suffixes:
return self.allow_if_no_extension
return any(ext in suffixes for ext in self.extensions)

def __call__(self, paths: List[str]) -> List[str]:
return [path for path in paths if self._file_has_extension(path)]

def __str__(self):
return (
f"{type(self).__name__}(extensions={self.extensions}, "
f"allow_if_no_extensions={self.allow_if_no_extension})"
)

def __repr__(self):
return str(self)


@DeveloperAPI
class FileBasedDatasource(Datasource):
Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/tests/test_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def read_csv(


def test_file_extension_filter_is_deprecated():
with pytest.warns(DeprecationWarning):
with pytest.raises(DeprecationWarning):
FileExtensionFilter("csv")


Expand Down
Loading