FEAT-modin-project#2451: Linting

Signed-off-by: William Ma <[email protected]>
williamma12 · Feb 3, 2021 · 7406629 · 7406629
1 parent e7dbd14
commit 7406629
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 12 deletions.
diff --git a/modin/engines/base/io/file_dispatcher.py b/modin/engines/base/io/file_dispatcher.py
@@ -68,7 +68,7 @@ def get_path(cls, file_path: str) -> str:
             String of strings of absolute file paths.
         """
         if S3_ADDRESS_REGEX.search(file_path):
-            return _s3_path(file_path, False)[0]
+            return cls._s3_path(file_path, False)[0]
         else:
             return os.path.abspath(file_path)
 
@@ -172,7 +172,6 @@ def get_file_path(fs_handle) -> List[str]:
         s3fs = S3FS.S3FileSystem(anon=True)
         return get_file_path(s3fs)
 
-
     @classmethod
     def file_exists(cls, file_path: str) -> bool:
         """
@@ -190,7 +189,7 @@ def file_exists(cls, file_path: str) -> bool:
         """
         if isinstance(file_path, str):
             if S3_ADDRESS_REGEX.search(file_path):
-                return len(_s3_path(file_path, False)) > 0
+                return len(cls._s3_path(file_path, False)) > 0
         return os.path.exists(file_path)
 
     @classmethod

diff --git a/modin/engines/base/io/text/csv_glob_dispatcher.py b/modin/engines/base/io/text/csv_glob_dispatcher.py
@@ -354,7 +354,6 @@ def partitioned_multiple_files(
         final_result = []
         split_result = []
         split_size = 0
-        read_rows_counter = 0
         for f, fname in zip(files, fnames):
             if skip_header:
                 outside_quotes, read_rows = cls._read_rows(
@@ -369,7 +368,7 @@ def partitioned_multiple_files(
                 remainder_size = partition_size - split_size
                 start = f.tell()
                 if nrows:
-                    outside_quotes, read_rows = cls._read_rows(
+                    _, read_rows = cls._read_rows(
                         f,
                         nrows=remainder_size,
                         quotechar=quotechar,
@@ -379,14 +378,14 @@ def partitioned_multiple_files(
                     nrows -= read_rows
                     end = f.tell()
                 else:
-                    outside_quotes = cls.offset(
+                    cls.offset(
                         f,
                         offset_size=remainder_size,
                         quotechar=quotechar,
                         is_quoting=is_quoting,
                     )
                     end = f.tell()
-                    split_size += (end - start)
+                    split_size += end - start
                 split_result.append((fname, start, end))
                 if split_size < partition_size:
                     continue
@@ -411,15 +410,15 @@ def partitioned_multiple_files(
                     continue
                 else:
                     rows_read -= skiprows
-            
+
             # Calculate if the last split needs to be carried over to the next file.
             if nrows:
                 last_size = rows_read % partition_size
                 full_last_partition = last_size == 0
                 nrows -= rows_read
             else:
                 _, last_start, last_end = file_splits[-1]
-                last_size = (last_end - last_start)
+                last_size = last_end - last_start
                 full_last_partition = last_size >= partition_size
 
             if full_last_partition:
@@ -434,4 +433,3 @@ def partitioned_multiple_files(
             final_result.append(split_result)
 
         return final_result
-
diff --git a/modin/experimental/engines/pandas_on_ray/io_exp.py b/modin/experimental/engines/pandas_on_ray/io_exp.py
@@ -62,7 +62,9 @@ class ExperimentalPandasOnRayIO(PandasOnRayIO):
         query_compiler_cls=PandasQueryCompiler,
         frame_cls=PandasOnRayFrame,
     )
-    read_csv = type("", (RayTask, PandasCSVGlobParser, CSVGlobDispatcher), build_args)._read
+    read_csv = type(
+        "", (RayTask, PandasCSVGlobParser, CSVGlobDispatcher), build_args
+    )._read
     read_parquet_remote_task = _read_parquet_columns
 
     @classmethod

diff --git a/modin/experimental/pandas/io_exp.py b/modin/experimental/pandas/io_exp.py
@@ -73,6 +73,7 @@ def read_sql(
     _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
     return DataFrame(query_compiler=EngineDispatcher.read_sql(**kwargs))
 
+
 # CSV and table
 def _make_parser_func(sep):
     """

diff --git a/modin/experimental/pandas/test/test_io_exp.py b/modin/experimental/pandas/test/test_io_exp.py
@@ -17,6 +17,7 @@
 from modin.config import Engine
 from modin.pandas.test.test_io import (  # noqa: F401
     df_equals,
+    eval_io,
     make_sql_connection,
     make_csv_file,
 )
@@ -66,10 +67,11 @@ def test_from_sql_defaults(make_sql_connection):  # noqa: F811
     df_equals(modin_df_from_query, pandas_df)
     df_equals(modin_df_from_table, pandas_df)
 
+
 @pytest.mark.skipif(
     Engine.get() != "Ray", reason="Currently only support Ray engine for glob paths."
 )
-def test_read_multiple_csv(make_csv_file):
+def test_read_multiple_csv(make_csv_file):  # noqa: F811
     base_name = get_unique_filename(extension="")
     glob_path = "{}_*.csv".format(base_name)
     files = ["{}_{}.csv".format(base_name, i) for i in range(2)]
@@ -89,3 +91,10 @@ def test_read_multiple_csv(make_csv_file):
     except AssertionError:
         df_equals(modin_df, pandas_df2)
 
+
+def test_read_csv_s3(self):
+    eval_io(
+        fn_name="read_csv",
+        # read_csv kwargs
+        filepath_or_buffer="s3://noaa-ghcn-pds/csv/178*.csv",
+    )