Skip to content

Commit

Permalink
TEST-#2288: addressing review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Alexander Myskov <[email protected]>
  • Loading branch information
amyskov committed Oct 28, 2020
1 parent 1f81eb1 commit 3aa53d2
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 125 deletions.
132 changes: 7 additions & 125 deletions modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import shutil
import sqlalchemy as sa
import csv
from string import ascii_letters

from .utils import (
df_equals,
Expand All @@ -35,6 +34,10 @@
json_long_bytes,
random_state,
eval_io,
get_unique_filename,
get_random_string,
insert_lines_to_csv,
IO_OPS_DATA_DIR,
)

from modin.config import Engine, Backend
Expand Down Expand Up @@ -62,9 +65,9 @@
TEST_GBQ_FILENAME = "test_gbq."
SMALL_ROW_SIZE = 2000

test_data_dir = os.path.join(os.path.dirname(__file__), "read_csv_data")
if not os.path.exists(test_data_dir):
os.mkdir(test_data_dir)

if not os.path.exists(IO_OPS_DATA_DIR):
os.mkdir(IO_OPS_DATA_DIR)


@pytest.fixture
Expand Down Expand Up @@ -159,76 +162,6 @@ def teardown_test_file(test_path):
os.remove(test_path)


def get_random_string():
random_string = "".join(
random_state.choice([x for x in ascii_letters], size=10).tolist()
)
return random_string


def insert_lines_to_csv(
csv_name: str,
lines_positions: list,
lines_type: str = "blank",
encoding: str = None,
**csv_reader_writer_params,
):
"""Insert lines to ".csv" file.
Parameters
----------
csv_name: str
".csv" file that should be modified.
lines_positions: list of ints
Lines postions that sghould be modified (serial number
of line - begins from 0, ends in <rows_number> - 1).
lines_type: str
Lines types that should be inserted to ".csv" file. Possible types:
"blank" - empty line without any delimiters/separators,
"bad" - lines with len(lines_data) > cols_number
encoding: str
Encoding type that should be used during file reading and writing.
"""
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
if lines_type == "blank":
lines_data = []
elif lines_type == "bad":
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
lines_data = [x for x in range(cols_number + 1)]
else:
raise ValueError(
f"acceptable values for parameter are ['blank', 'bad'], actually passed {lines_type}"
)
lines = []
dialect = "excel"
with open(csv_name, "r", encoding=encoding, newline="") as read_file:
try:
dialect = csv.Sniffer().sniff(read_file.read())
read_file.seek(0)
except Exception:
dialect = None

reader = csv.reader(
read_file,
dialect=dialect if dialect is not None else "excel",
**csv_reader_writer_params,
)
counter = 0
for row in reader:
if counter in lines_positions:
lines.append(lines_data)
else:
lines.append(row)
counter += 1
with open(csv_name, "w", encoding=encoding, newline="") as write_file:
writer = csv.writer(
write_file,
dialect=dialect if dialect is not None else "excel",
**csv_reader_writer_params,
)
writer.writerows(lines)


def _make_csv_file(filenames):
def _csv_file_maker(
filename=TEST_CSV_FILENAME,
Expand Down Expand Up @@ -562,57 +495,6 @@ def teardown_fwf_file():
pass


def get_unique_filename(
test_name: str,
kwargs: dict = {},
extension: str = "csv",
data_dir: str = test_data_dir,
suffix: str = "",
):
"""Returns unique file name with specified parameters.
Parameters
----------
test_name: str
name of the test for which the unique file name is needed.
kwargs: list of ints
Unique combiantion of test parameters for creation of unique name.
extension: str
Extension of unique file.
data_dir: str
Data directory where test files will be created.
suffix: str
String to append to the resulted name.
Returns
-------
Unique file name.
"""
# shortcut if kwargs parameter os not provided
if len(kwargs) == 0 and extension == "csv" and suffix == "":
return os.path.join(data_dir, (test_name + f"_{suffix}" + f".{extension}"))

assert "." not in extension, "please provide pure extension name without '.'"
prohibited_chars = ['"', "\n"]
non_prohibited_char = "np_char"
char_counter = 0
kwargs_name = dict(kwargs)
for key, value in kwargs_name.items():
for char in prohibited_chars:
if isinstance(value, str) and char in value or callable(value):
kwargs_name[key] = non_prohibited_char + str(char_counter)
char_counter += 1
parameters_values = "_".join(
[
str(value)
if not isinstance(value, (list, tuple))
else "_".join([str(x) for x in value])
for value in kwargs_name.values()
]
)
return os.path.join(data_dir, parameters_values + f"_{suffix}" + f".{extension}")


class TestReadCSV:
# delimiter tests
@pytest.mark.parametrize("sep", ["_", ",", ".", "\n"])
Expand Down
127 changes: 127 additions & 0 deletions modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
from modin.utils import to_pandas
from modin.config import TestDatasetSize
from io import BytesIO
import os
from string import ascii_letters
import csv

random_state = np.random.RandomState(seed=42)

Expand All @@ -41,6 +44,9 @@
RAND_LOW = 0
RAND_HIGH = 100

# Directory for storing I/O operations test data
IO_OPS_DATA_DIR = os.path.join(os.path.dirname(__file__), "read_csv_data")

# Input data and functions for the tests
# The test data that we will test our code against
test_data = {
Expand Down Expand Up @@ -832,3 +838,124 @@ def generate_none_dfs():
}
)
return df, df2


def get_unique_filename(
test_name: str,
kwargs: dict = {},
extension: str = "csv",
data_dir: str = IO_OPS_DATA_DIR,
suffix: str = "",
):
"""Returns unique file name with specified parameters.
Parameters
----------
test_name: str
name of the test for which the unique file name is needed.
kwargs: list of ints
Unique combiantion of test parameters for creation of unique name.
extension: str
Extension of unique file.
data_dir: str
Data directory where test files will be created.
suffix: str
String to append to the resulted name.
Returns
-------
Unique file name.
"""
# shortcut if kwargs parameter os not provided
if len(kwargs) == 0 and extension == "csv" and suffix == "":
return os.path.join(data_dir, (test_name + f"_{suffix}" + f".{extension}"))

assert "." not in extension, "please provide pure extension name without '.'"
prohibited_chars = ['"', "\n"]
non_prohibited_char = "np_char"
char_counter = 0
kwargs_name = dict(kwargs)
for key, value in kwargs_name.items():
for char in prohibited_chars:
if isinstance(value, str) and char in value or callable(value):
kwargs_name[key] = non_prohibited_char + str(char_counter)
char_counter += 1
parameters_values = "_".join(
[
str(value)
if not isinstance(value, (list, tuple))
else "_".join([str(x) for x in value])
for value in kwargs_name.values()
]
)
return os.path.join(data_dir, parameters_values + f"_{suffix}" + f".{extension}")


def get_random_string():
random_string = "".join(
random_state.choice([x for x in ascii_letters], size=10).tolist()
)
return random_string


def insert_lines_to_csv(
csv_name: str,
lines_positions: list,
lines_type: str = "blank",
encoding: str = None,
**csv_reader_writer_params,
):
"""Insert lines to ".csv" file.
Parameters
----------
csv_name: str
".csv" file that should be modified.
lines_positions: list of ints
Lines postions that sghould be modified (serial number
of line - begins from 0, ends in <rows_number> - 1).
lines_type: str
Lines types that should be inserted to ".csv" file. Possible types:
"blank" - empty line without any delimiters/separators,
"bad" - lines with len(lines_data) > cols_number
encoding: str
Encoding type that should be used during file reading and writing.
"""
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
if lines_type == "blank":
lines_data = []
elif lines_type == "bad":
cols_number = len(pandas.read_csv(csv_name, nrows=1).columns)
lines_data = [x for x in range(cols_number + 1)]
else:
raise ValueError(
f"acceptable values for parameter are ['blank', 'bad'], actually passed {lines_type}"
)
lines = []
dialect = "excel"
with open(csv_name, "r", encoding=encoding, newline="") as read_file:
try:
dialect = csv.Sniffer().sniff(read_file.read())
read_file.seek(0)
except Exception:
dialect = None

reader = csv.reader(
read_file,
dialect=dialect if dialect is not None else "excel",
**csv_reader_writer_params,
)
counter = 0
for row in reader:
if counter in lines_positions:
lines.append(lines_data)
else:
lines.append(row)
counter += 1
with open(csv_name, "w", encoding=encoding, newline="") as write_file:
writer = csv.writer(
write_file,
dialect=dialect if dialect is not None else "excel",
**csv_reader_writer_params,
)
writer.writerows(lines)

0 comments on commit 3aa53d2

Please sign in to comment.