-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from RikitoNoto/feature/create_faculty_parser
Feature/create faculty parser
- Loading branch information
Showing
13 changed files
with
307 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
class Department: | ||
def __init__(self, name: str) -> None: | ||
self.__name: str = name | ||
pass | ||
|
||
@property | ||
def name(self) -> str: | ||
return self.__name | ||
|
||
|
||
class Faculty: | ||
def __init__(self, name: str, departments: list[Department]) -> None: | ||
self.__name: str = name | ||
self.__departments: list[Department] = departments | ||
|
||
@property | ||
def name(self) -> str: | ||
return self.__name | ||
|
||
@property | ||
def departments(self) -> list[Department]: | ||
return self.__departments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from typing import Optional | ||
from openpyxl.worksheet.worksheet import Worksheet | ||
from openpyxl.cell.cell import Cell | ||
from src.models.faculty import Department, Faculty | ||
from src.parser.parser import Parser | ||
|
||
|
||
class FacultyParser(Parser): | ||
def parse(self) -> list[Faculty]: | ||
""" | ||
学部のセルを基準にデータを検索しパースする。 | ||
""" | ||
base_cell: Optional[Cell] = self._search_cell( | ||
"学部", | ||
self._sheet, | ||
) | ||
if not base_cell: | ||
raise ValueError('"学部"の文字が見つかりませんでした。') | ||
|
||
row = base_cell.row + 3 # 学部の3行下から開始 | ||
column = base_cell.column | ||
faculties: list[Faculty] = [] | ||
# 空白のセルが見つかるまで、下を検索 | ||
while self._sheet.cell(row=row, column=column).value not in [None, ""]: | ||
faculty_name = self._sheet.cell(row=row, column=column).value | ||
|
||
# 初めて見つけた学科の場合は新規追加 | ||
if faculty_name not in [f.name for f in faculties]: | ||
faculties.append( | ||
Faculty( | ||
self._sheet.cell(row=row, column=column).value, | ||
[ | ||
Department( | ||
self._sheet.cell(row=row, column=column + 2).value, | ||
) | ||
], | ||
) | ||
) | ||
else: | ||
for faculty in faculties: | ||
if faculty.name == faculty_name: | ||
faculty.departments.append( | ||
Department( | ||
self._sheet.cell(row=row, column=column + 2).value | ||
), | ||
), | ||
row += 1 | ||
|
||
return faculties |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from abc import ABC, abstractmethod | ||
from openpyxl.worksheet.worksheet import Worksheet | ||
from openpyxl.cell.cell import Cell | ||
from typing import Optional | ||
|
||
|
||
class Parser(ABC): | ||
def __init__(self, sheet: Worksheet) -> None: | ||
self._sheet = sheet | ||
|
||
@abstractmethod | ||
def parse(): | ||
pass | ||
|
||
def _search_cell(self, keyword: str, sheet: Worksheet) -> Optional[Cell]: | ||
""" | ||
keywordをシートから検索し、最初に見つけたセルを返す。 | ||
検索はA1→A2→B2→B1→A3→B3→C3→C2→C1 | ||
のようにA1から(max_row, max_column)に直線を引くような方向で検索をする。 | ||
Args: | ||
keyword (str): 検索するキーワード | ||
sheet (Worksheet): 検索対象のシート | ||
""" | ||
# 行か列の大きい方でループ | ||
for i in range(max([sheet.max_column, sheet.max_row])): | ||
column = i + 1 | ||
# columnを縦に検索 | ||
for j in range(column - 1): | ||
row = i + 1 | ||
column = j + 1 | ||
if sheet.cell(row=row, column=column).value == keyword: | ||
return sheet.cell(row=row, column=column) | ||
|
||
# rowとcolumnが同じセルを検索 | ||
if sheet.cell(row=column, column=column).value == keyword: | ||
return sheet.cell(row=column, column=column) | ||
|
||
# rowを右から左に検索 | ||
for j in reversed(range(column - 1)): | ||
row = j + 1 | ||
if sheet.cell(row=row, column=column).value == keyword: | ||
return sheet.cell(row=row, column=column) |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from abc import ABC, abstractmethod | ||
import openpyxl | ||
from openpyxl.worksheet.worksheet import Worksheet | ||
import os | ||
import pytest | ||
from typing import Generic, TypeVar | ||
|
||
from src.parser.parser import Parser | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
class SingleSheetTestBase(ABC, Generic[T]): | ||
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx" | ||
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx" | ||
__book: openpyxl.Workbook | ||
|
||
def setup_method(self, method): | ||
pass | ||
|
||
def teardown_method(self, method): | ||
if self.__book: | ||
self.__book.close() | ||
|
||
def get_sheet(self, path) -> Worksheet: | ||
self.__book = openpyxl.load_workbook(path) | ||
return self.__book[self.__book.sheetnames[0]] | ||
|
||
def parse(self, path: str) -> T: | ||
sheet = self.get_sheet(path) | ||
parser = self.get_parser(sheet) | ||
return parser.parse() | ||
|
||
@abstractmethod | ||
def get_parser(self, sheet: Worksheet) -> Parser: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,33 @@ | ||
import openpyxl | ||
from openpyxl.worksheet.worksheet import Worksheet | ||
import os | ||
import pytest | ||
from src.models.base_info import BaseInfo | ||
from src.parser.base_info_parser import BaseInfoParser | ||
from src.parser.parser import Parser | ||
from tests.parser.single_sheet_test_base import SingleSheetTestBase | ||
|
||
|
||
class TestBaseInfoParser: | ||
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx" | ||
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx" | ||
__book: openpyxl.Workbook | ||
|
||
@pytest.fixture | ||
def wrap(self): | ||
self.set_up() | ||
yield | ||
self.tear_down() | ||
|
||
def set_up(self): | ||
pass | ||
|
||
def tear_down(self): | ||
if self.__book: | ||
self.__book.close() | ||
|
||
def get_sheet(self, path) -> Worksheet: | ||
self.__book = openpyxl.load_workbook(path) | ||
return self.__book[self.__book.sheetnames[0]] | ||
|
||
def parse(self, path: str) -> BaseInfo: | ||
sheet = self.get_sheet(path) | ||
parser = BaseInfoParser(sheet) | ||
return parser.parse() | ||
class TestBaseInfoParser(SingleSheetTestBase[BaseInfo]): | ||
def get_parser(self, sheet: Worksheet) -> Parser: | ||
return BaseInfoParser(sheet) | ||
|
||
@pytest.mark.parametrize( | ||
"path, expect", | ||
"path, exp", | ||
[ | ||
(EXCEL_FILE_PATH_1, "F101110100010"), | ||
(EXCEL_FILE_PATH_2, "F123310106522"), | ||
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "F101110100010"), | ||
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "F123310106522"), | ||
], | ||
) | ||
def test_should_parse_school_code(self, path: str, expect: str): | ||
def test_should_parse_school_code(self, path: str, exp: str): | ||
base_info = self.parse(path) | ||
assert base_info.school_code == expect | ||
assert base_info.school_code == exp | ||
|
||
@pytest.mark.parametrize( | ||
"path, expect", | ||
"path, exp", | ||
[ | ||
(EXCEL_FILE_PATH_1, "寳金 清博"), | ||
(EXCEL_FILE_PATH_2, "景山 節"), | ||
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "寳金 清博"), | ||
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "景山 節"), | ||
], | ||
) | ||
def test_should_parse_president(self, path: str, expect: str): | ||
def test_should_parse_president(self, path: str, exp: str): | ||
base_info = self.parse(path) | ||
assert base_info.president == expect | ||
assert base_info.president == exp |
Oops, something went wrong.