Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/create faculty parser #2

Merged
merged 3 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

~*.xlsx
Empty file added src/__init__.py
Empty file.
Empty file added src/models/__init__.py
Empty file.
4 changes: 2 additions & 2 deletions src/models/base_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ def __init__(self, school_code="", president="") -> None:
self.__president: str = president

@property
def school_code(self):
def school_code(self) -> str:
return self.__school_code

@property
def president(self):
def president(self) -> str:
return self.__president
22 changes: 22 additions & 0 deletions src/models/faculty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class Department:
def __init__(self, name: str) -> None:
self.__name: str = name
pass

@property
def name(self) -> str:
return self.__name


class Faculty:
def __init__(self, name: str, departments: list[Department]) -> None:
self.__name: str = name
self.__departments: list[Department] = departments

@property
def name(self) -> str:
return self.__name

@property
def departments(self) -> list[Department]:
return self.__departments
42 changes: 5 additions & 37 deletions src/parser/base_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,25 @@
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from src.models.base_info import BaseInfo
from src.parser.parser import Parser


class BaseInfoParser:
def __init__(self, sheet: Worksheet) -> None:
self.__sheet = sheet

class BaseInfoParser(Parser):
def parse(self) -> BaseInfo:
"""
学校コードのセルを基準にデータを検索しパースする。
"""
base_cell: Optional[Cell] = self._search_cell(
"学校コード",
self.__sheet,
self._sheet,
)
if not base_cell:
raise ValueError("学校コードが見つかりませんでした。")
return BaseInfo(
school_code=self.__sheet.cell(
school_code=self._sheet.cell(
row=base_cell.row + 1, column=base_cell.column
).value,
president=self.__sheet.cell(
president=self._sheet.cell(
row=base_cell.row + 1, column=base_cell.column + 1
).value,
)

def _search_cell(self, keyword: str, sheet: Worksheet) -> Optional[Cell]:
"""
keywordをシートから検索し、最初に見つけたセルを返す。
検索はA1→A2→B2→B1→A3→B3→C3→C2→C1
のようにA1から(max_row, max_column)に直線を引くような方向で検索をする。

Args:
keyword (str): 検索するキーワード
sheet (Worksheet): 検索対象のシート
"""

for i in range(sheet.max_column):
column = i + 1
# columnを縦に検索
for j in range(column - 1):
row = i + 1
column = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)

# rowとcolumnが同じセルを検索
if sheet.cell(row=column, column=column).value == keyword:
return sheet.cell(row=column, column=column)

# rowを右から左に検索
for j in reversed(range(column - 1)):
row = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)
49 changes: 49 additions & 0 deletions src/parser/faculty_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import Optional
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from src.models.faculty import Department, Faculty
from src.parser.parser import Parser


class FacultyParser(Parser):
def parse(self) -> list[Faculty]:
"""
学部のセルを基準にデータを検索しパースする。
"""
base_cell: Optional[Cell] = self._search_cell(
"学部",
self._sheet,
)
if not base_cell:
raise ValueError('"学部"の文字が見つかりませんでした。')

row = base_cell.row + 3 # 学部の3行下から開始
column = base_cell.column
faculties: list[Faculty] = []
# 空白のセルが見つかるまで、下を検索
while self._sheet.cell(row=row, column=column).value not in [None, ""]:
faculty_name = self._sheet.cell(row=row, column=column).value

# 初めて見つけた学科の場合は新規追加
if faculty_name not in [f.name for f in faculties]:
faculties.append(
Faculty(
self._sheet.cell(row=row, column=column).value,
[
Department(
self._sheet.cell(row=row, column=column + 2).value,
)
],
)
)
else:
for faculty in faculties:
if faculty.name == faculty_name:
faculty.departments.append(
Department(
self._sheet.cell(row=row, column=column + 2).value
),
),
row += 1

return faculties
43 changes: 43 additions & 0 deletions src/parser/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from abc import ABC, abstractmethod
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from typing import Optional


class Parser(ABC):
def __init__(self, sheet: Worksheet) -> None:
self._sheet = sheet

@abstractmethod
def parse():
pass

def _search_cell(self, keyword: str, sheet: Worksheet) -> Optional[Cell]:
"""
keywordをシートから検索し、最初に見つけたセルを返す。
検索はA1→A2→B2→B1→A3→B3→C3→C2→C1
のようにA1から(max_row, max_column)に直線を引くような方向で検索をする。

Args:
keyword (str): 検索するキーワード
sheet (Worksheet): 検索対象のシート
"""
# 行か列の大きい方でループ
for i in range(max([sheet.max_column, sheet.max_row])):
column = i + 1
# columnを縦に検索
for j in range(column - 1):
row = i + 1
column = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)

# rowとcolumnが同じセルを検索
if sheet.cell(row=column, column=column).value == keyword:
return sheet.cell(row=column, column=column)

# rowを右から左に検索
for j in reversed(range(column - 1)):
row = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)
Binary file added tests/parser/files/~$single_sheet_file1.xlsx
Binary file not shown.
Binary file added tests/parser/files/~$single_sheet_file2.xlsx
Binary file not shown.
36 changes: 36 additions & 0 deletions tests/parser/single_sheet_test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
import os
import pytest
from typing import Generic, TypeVar

from src.parser.parser import Parser

T = TypeVar("T")


class SingleSheetTestBase(ABC, Generic[T]):
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx"
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx"
__book: openpyxl.Workbook

def setup_method(self, method):
pass

def teardown_method(self, method):
if self.__book:
self.__book.close()

def get_sheet(self, path) -> Worksheet:
self.__book = openpyxl.load_workbook(path)
return self.__book[self.__book.sheetnames[0]]

def parse(self, path: str) -> T:
sheet = self.get_sheet(path)
parser = self.get_parser(sheet)
return parser.parse()

@abstractmethod
def get_parser(self, sheet: Worksheet) -> Parser:
pass
53 changes: 15 additions & 38 deletions tests/parser/test_base_info_parser.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,33 @@
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
import os
import pytest
from src.models.base_info import BaseInfo
from src.parser.base_info_parser import BaseInfoParser
from src.parser.parser import Parser
from tests.parser.single_sheet_test_base import SingleSheetTestBase


class TestBaseInfoParser:
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx"
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx"
__book: openpyxl.Workbook

@pytest.fixture
def wrap(self):
self.set_up()
yield
self.tear_down()

def set_up(self):
pass

def tear_down(self):
if self.__book:
self.__book.close()

def get_sheet(self, path) -> Worksheet:
self.__book = openpyxl.load_workbook(path)
return self.__book[self.__book.sheetnames[0]]

def parse(self, path: str) -> BaseInfo:
sheet = self.get_sheet(path)
parser = BaseInfoParser(sheet)
return parser.parse()
class TestBaseInfoParser(SingleSheetTestBase[BaseInfo]):
def get_parser(self, sheet: Worksheet) -> Parser:
return BaseInfoParser(sheet)

@pytest.mark.parametrize(
"path, expect",
"path, exp",
[
(EXCEL_FILE_PATH_1, "F101110100010"),
(EXCEL_FILE_PATH_2, "F123310106522"),
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "F101110100010"),
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "F123310106522"),
],
)
def test_should_parse_school_code(self, path: str, expect: str):
def test_should_parse_school_code(self, path: str, exp: str):
base_info = self.parse(path)
assert base_info.school_code == expect
assert base_info.school_code == exp

@pytest.mark.parametrize(
"path, expect",
"path, exp",
[
(EXCEL_FILE_PATH_1, "寳金 清博"),
(EXCEL_FILE_PATH_2, "景山 節"),
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "寳金 清博"),
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "景山 節"),
],
)
def test_should_parse_president(self, path: str, expect: str):
def test_should_parse_president(self, path: str, exp: str):
base_info = self.parse(path)
assert base_info.president == expect
assert base_info.president == exp
Loading