Skip to content

Commit

Permalink
FEAT-modin-project#2451: Linting
Browse files Browse the repository at this point in the history
Signed-off-by: William Ma <[email protected]>
  • Loading branch information
williamma12 committed Feb 3, 2021
1 parent e7dbd14 commit 7406629
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 12 deletions.
5 changes: 2 additions & 3 deletions modin/engines/base/io/file_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_path(cls, file_path: str) -> str:
String of strings of absolute file paths.
"""
if S3_ADDRESS_REGEX.search(file_path):
return _s3_path(file_path, False)[0]
return cls._s3_path(file_path, False)[0]
else:
return os.path.abspath(file_path)

Expand Down Expand Up @@ -172,7 +172,6 @@ def get_file_path(fs_handle) -> List[str]:
s3fs = S3FS.S3FileSystem(anon=True)
return get_file_path(s3fs)


@classmethod
def file_exists(cls, file_path: str) -> bool:
"""
Expand All @@ -190,7 +189,7 @@ def file_exists(cls, file_path: str) -> bool:
"""
if isinstance(file_path, str):
if S3_ADDRESS_REGEX.search(file_path):
return len(_s3_path(file_path, False)) > 0
return len(cls._s3_path(file_path, False)) > 0
return os.path.exists(file_path)

@classmethod
Expand Down
12 changes: 5 additions & 7 deletions modin/engines/base/io/text/csv_glob_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,6 @@ def partitioned_multiple_files(
final_result = []
split_result = []
split_size = 0
read_rows_counter = 0
for f, fname in zip(files, fnames):
if skip_header:
outside_quotes, read_rows = cls._read_rows(
Expand All @@ -369,7 +368,7 @@ def partitioned_multiple_files(
remainder_size = partition_size - split_size
start = f.tell()
if nrows:
outside_quotes, read_rows = cls._read_rows(
_, read_rows = cls._read_rows(
f,
nrows=remainder_size,
quotechar=quotechar,
Expand All @@ -379,14 +378,14 @@ def partitioned_multiple_files(
nrows -= read_rows
end = f.tell()
else:
outside_quotes = cls.offset(
cls.offset(
f,
offset_size=remainder_size,
quotechar=quotechar,
is_quoting=is_quoting,
)
end = f.tell()
split_size += (end - start)
split_size += end - start
split_result.append((fname, start, end))
if split_size < partition_size:
continue
Expand All @@ -411,15 +410,15 @@ def partitioned_multiple_files(
continue
else:
rows_read -= skiprows

# Calculate if the last split needs to be carried over to the next file.
if nrows:
last_size = rows_read % partition_size
full_last_partition = last_size == 0
nrows -= rows_read
else:
_, last_start, last_end = file_splits[-1]
last_size = (last_end - last_start)
last_size = last_end - last_start
full_last_partition = last_size >= partition_size

if full_last_partition:
Expand All @@ -434,4 +433,3 @@ def partitioned_multiple_files(
final_result.append(split_result)

return final_result

4 changes: 3 additions & 1 deletion modin/experimental/engines/pandas_on_ray/io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class ExperimentalPandasOnRayIO(PandasOnRayIO):
query_compiler_cls=PandasQueryCompiler,
frame_cls=PandasOnRayFrame,
)
read_csv = type("", (RayTask, PandasCSVGlobParser, CSVGlobDispatcher), build_args)._read
read_csv = type(
"", (RayTask, PandasCSVGlobParser, CSVGlobDispatcher), build_args
)._read
read_parquet_remote_task = _read_parquet_columns

@classmethod
Expand Down
1 change: 1 addition & 0 deletions modin/experimental/pandas/io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def read_sql(
_, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
return DataFrame(query_compiler=EngineDispatcher.read_sql(**kwargs))


# CSV and table
def _make_parser_func(sep):
"""
Expand Down
11 changes: 10 additions & 1 deletion modin/experimental/pandas/test/test_io_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from modin.config import Engine
from modin.pandas.test.test_io import ( # noqa: F401
df_equals,
eval_io,
make_sql_connection,
make_csv_file,
)
Expand Down Expand Up @@ -66,10 +67,11 @@ def test_from_sql_defaults(make_sql_connection): # noqa: F811
df_equals(modin_df_from_query, pandas_df)
df_equals(modin_df_from_table, pandas_df)


@pytest.mark.skipif(
Engine.get() != "Ray", reason="Currently only support Ray engine for glob paths."
)
def test_read_multiple_csv(make_csv_file):
def test_read_multiple_csv(make_csv_file): # noqa: F811
base_name = get_unique_filename(extension="")
glob_path = "{}_*.csv".format(base_name)
files = ["{}_{}.csv".format(base_name, i) for i in range(2)]
Expand All @@ -89,3 +91,10 @@ def test_read_multiple_csv(make_csv_file):
except AssertionError:
df_equals(modin_df, pandas_df2)


def test_read_csv_s3(self):
eval_io(
fn_name="read_csv",
# read_csv kwargs
filepath_or_buffer="s3://noaa-ghcn-pds/csv/178*.csv",
)

0 comments on commit 7406629

Please sign in to comment.