Skip to content

Commit

Permalink
refactor(excel2json-lists): add filename to sheet class (#1090)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann authored Aug 9, 2024
1 parent 4888995 commit 2f9e30e
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ class ExcelFile:

@dataclass
class ExcelSheet:
excel_name: str
sheet_name: str
df: pd.DataFrame
15 changes: 9 additions & 6 deletions src/dsp_tools/commands/excel2json/new_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@ def _parse_files(excelfolder: Path | str) -> list[ExcelFile]:
file_names = [file for file in Path(excelfolder).glob("*list*.xlsx", case_sensitive=False) if _non_hidden(file)]
all_files = []
for file in file_names:
sheets = [ExcelSheet(sheet_name=name, df=df) for name, df in read_and_clean_all_sheets(file).items()]
all_files.append(ExcelFile(filename=file.stem, sheets=sheets))
sheets = [
ExcelSheet(excel_name=str(file), sheet_name=name, df=df)
for name, df in read_and_clean_all_sheets(file).items()
]
all_files.append(ExcelFile(filename=str(file), sheets=sheets))
return all_files


Expand Down Expand Up @@ -125,7 +128,7 @@ def _add_id_optional_column_if_not_exists(list_files: list[ExcelFile]) -> list[E
if "id (optional)" not in sheet.df.columns:
df = sheet.df
df["id (optional)"] = pd.NA
all_sheets.append(ExcelSheet(sheet_name=sheet.sheet_name, df=df))
all_sheets.append(ExcelSheet(excel_name=file.filename, sheet_name=sheet.sheet_name, df=df))
else:
all_sheets.append(sheet)
all_files.append(ExcelFile(filename=file.filename, sheets=all_sheets))
Expand All @@ -138,7 +141,7 @@ def _construct_ids(list_files: list[ExcelFile]) -> list[ExcelFile]:
all_sheets = []
for sheet in file.sheets:
df = _complete_id_one_df(sheet.df, _get_preferred_language(sheet.df.columns))
all_sheets.append(ExcelSheet(sheet_name=sheet.sheet_name, df=df))
all_sheets.append(ExcelSheet(excel_name=file.filename, sheet_name=sheet.sheet_name, df=df))
all_files.append(ExcelFile(filename=file.filename, sheets=all_sheets))
all_files = _resolve_duplicate_ids_all_excels(all_files)
return _fill_parent_id_col_all_excels(all_files)
Expand All @@ -150,7 +153,7 @@ def _fill_parent_id_col_all_excels(list_files: list[ExcelFile]) -> list[ExcelFil
all_sheets = []
for sheet in file.sheets:
df = _fill_parent_id_col_one_df(sheet.df, _get_preferred_language(sheet.df.columns))
all_sheets.append(ExcelSheet(sheet_name=sheet.sheet_name, df=df))
all_sheets.append(ExcelSheet(excel_name=file.filename, sheet_name=sheet.sheet_name, df=df))
all_files.append(ExcelFile(filename=file.filename, sheets=all_sheets))
return all_files

Expand Down Expand Up @@ -191,7 +194,7 @@ def _remove_duplicate_ids_in_all_excels(duplicate_ids: list[str], list_files: li
for i, row in df.iterrows():
if row["id"] in duplicate_ids and pd.isna(row["id (optional)"]):
df.at[i, "id"] = _construct_non_duplicate_id_string(df.iloc[int(str(i))], preferred_lang)
all_sheets.append(ExcelSheet(sheet.sheet_name, df))
all_sheets.append(ExcelSheet(file.filename, sheet.sheet_name, df))
all_files.append(ExcelFile(file.filename, all_sheets))
return list_files

Expand Down
16 changes: 8 additions & 8 deletions test/unittests/commands/excel2json/test_new_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def test_resolve_duplicates_in_all_excels(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=f2_s2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=f2_s2)]),
]
res = _remove_duplicate_ids_in_all_excels(["1"], all_excels)
assert len(res) == 2
Expand Down Expand Up @@ -78,8 +78,8 @@ def test_resolve_duplicates_in_all_excels_custom_id(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=f2_s2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=f2_s2)]),
]
res = _remove_duplicate_ids_in_all_excels(["1"], all_excels)
assert len(res) == 2
Expand Down Expand Up @@ -108,8 +108,8 @@ def test_analyse_resolve_all_excel_duplicates_with_duplicates(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=f2_s2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=f2_s2)]),
]
res = _resolve_duplicate_ids_all_excels(all_excels)
assert len(res) == 2
Expand Down Expand Up @@ -138,8 +138,8 @@ def test_analyse_resolve_all_excel_duplicates_no_duplicates(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=f2_s2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=f1_s1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=f2_s2)]),
]
res = _resolve_duplicate_ids_all_excels(all_excels)
assert len(res) == 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def test_good(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
_make_shape_compliance_all_excels(all_excels)

Expand All @@ -79,10 +79,13 @@ def test_problem(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(
filename="file2",
sheets=[ExcelSheet(sheet_name="sheet2", df=df_2), ExcelSheet(sheet_name="sheet3", df=df_3)],
sheets=[
ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2),
ExcelSheet(excel_name="file2", sheet_name="sheet3", df=df_3),
],
),
]
expected = regex.escape(
Expand Down Expand Up @@ -129,8 +132,8 @@ def test_good(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
_check_duplicates_all_excels(all_excels)

Expand All @@ -143,7 +146,7 @@ def test_problem(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
]
expected = regex.escape(
"\nThe excel file(s) used to create the list section have the following problem(s):\n\n"
Expand Down Expand Up @@ -173,8 +176,8 @@ def test_problem_duplicate_id(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
expected = regex.escape(
"\nThe excel file(s) used to create the list section have the following problem(s):"
Expand Down Expand Up @@ -206,8 +209,8 @@ def test_good(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
_check_for_unique_list_names(all_excels)

Expand All @@ -230,9 +233,12 @@ def test_problem(self) -> None:
all_excels = [
ExcelFile(
filename="file1",
sheets=[ExcelSheet(sheet_name="sheet1", df=df_1), ExcelSheet(sheet_name="sheet2", df=df_2)],
sheets=[
ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1),
ExcelSheet(excel_name="file1", sheet_name="sheet2", df=df_2),
],
),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_3)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_3)]),
]
expected = regex.escape(
"\nThe excel file(s) used to create the list section have the following problem(s):\n\n"
Expand Down Expand Up @@ -401,8 +407,8 @@ def test_good(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
_check_for_missing_translations_all_excels(all_excels)

Expand All @@ -421,8 +427,8 @@ def test_problem(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
expected = regex.escape(
"\nThe excel file(s) used to create the list section have the following problem(s):\n\n"
Expand Down Expand Up @@ -666,8 +672,8 @@ def test_all_good(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
_check_for_erroneous_entries_all_excels(all_excels)

Expand All @@ -681,8 +687,8 @@ def test_all_problem(self) -> None:
}
)
all_excels = [
ExcelFile(filename="file1", sheets=[ExcelSheet(sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(sheet_name="sheet2", df=df_2)]),
ExcelFile(filename="file1", sheets=[ExcelSheet(excel_name="file1", sheet_name="sheet1", df=df_1)]),
ExcelFile(filename="file2", sheets=[ExcelSheet(excel_name="file2", sheet_name="sheet2", df=df_2)]),
]
expected = regex.escape(
"\nThe excel file(s) used to create the list section have the following problem(s):\n\n"
Expand Down

0 comments on commit 2f9e30e

Please sign in to comment.