Skip to content

Commit

Permalink
Merge pull request #2 from RikitoNoto/feature/create_faculty_parser
Browse files Browse the repository at this point in the history
Feature/create faculty parser
  • Loading branch information
RikitoNoto authored Oct 22, 2023
2 parents 2c1a4a0 + 209d2b6 commit 82acec8
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

~*.xlsx
Empty file added src/__init__.py
Empty file.
Empty file added src/models/__init__.py
Empty file.
4 changes: 2 additions & 2 deletions src/models/base_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ def __init__(self, school_code="", president="") -> None:
self.__president: str = president

@property
def school_code(self):
def school_code(self) -> str:
return self.__school_code

@property
def president(self):
def president(self) -> str:
return self.__president
22 changes: 22 additions & 0 deletions src/models/faculty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
class Department:
def __init__(self, name: str) -> None:
self.__name: str = name
pass

@property
def name(self) -> str:
return self.__name


class Faculty:
def __init__(self, name: str, departments: list[Department]) -> None:
self.__name: str = name
self.__departments: list[Department] = departments

@property
def name(self) -> str:
return self.__name

@property
def departments(self) -> list[Department]:
return self.__departments
42 changes: 5 additions & 37 deletions src/parser/base_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,25 @@
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from src.models.base_info import BaseInfo
from src.parser.parser import Parser


class BaseInfoParser:
def __init__(self, sheet: Worksheet) -> None:
self.__sheet = sheet

class BaseInfoParser(Parser):
def parse(self) -> BaseInfo:
"""
学校コードのセルを基準にデータを検索しパースする。
"""
base_cell: Optional[Cell] = self._search_cell(
"学校コード",
self.__sheet,
self._sheet,
)
if not base_cell:
raise ValueError("学校コードが見つかりませんでした。")
return BaseInfo(
school_code=self.__sheet.cell(
school_code=self._sheet.cell(
row=base_cell.row + 1, column=base_cell.column
).value,
president=self.__sheet.cell(
president=self._sheet.cell(
row=base_cell.row + 1, column=base_cell.column + 1
).value,
)

def _search_cell(self, keyword: str, sheet: Worksheet) -> Optional[Cell]:
"""
keywordをシートから検索し、最初に見つけたセルを返す。
検索はA1→A2→B2→B1→A3→B3→C3→C2→C1
のようにA1から(max_row, max_column)に直線を引くような方向で検索をする。
Args:
keyword (str): 検索するキーワード
sheet (Worksheet): 検索対象のシート
"""

for i in range(sheet.max_column):
column = i + 1
# columnを縦に検索
for j in range(column - 1):
row = i + 1
column = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)

# rowとcolumnが同じセルを検索
if sheet.cell(row=column, column=column).value == keyword:
return sheet.cell(row=column, column=column)

# rowを右から左に検索
for j in reversed(range(column - 1)):
row = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)
49 changes: 49 additions & 0 deletions src/parser/faculty_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from typing import Optional
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from src.models.faculty import Department, Faculty
from src.parser.parser import Parser


class FacultyParser(Parser):
def parse(self) -> list[Faculty]:
"""
学部のセルを基準にデータを検索しパースする。
"""
base_cell: Optional[Cell] = self._search_cell(
"学部",
self._sheet,
)
if not base_cell:
raise ValueError('"学部"の文字が見つかりませんでした。')

row = base_cell.row + 3 # 学部の3行下から開始
column = base_cell.column
faculties: list[Faculty] = []
# 空白のセルが見つかるまで、下を検索
while self._sheet.cell(row=row, column=column).value not in [None, ""]:
faculty_name = self._sheet.cell(row=row, column=column).value

# 初めて見つけた学科の場合は新規追加
if faculty_name not in [f.name for f in faculties]:
faculties.append(
Faculty(
self._sheet.cell(row=row, column=column).value,
[
Department(
self._sheet.cell(row=row, column=column + 2).value,
)
],
)
)
else:
for faculty in faculties:
if faculty.name == faculty_name:
faculty.departments.append(
Department(
self._sheet.cell(row=row, column=column + 2).value
),
),
row += 1

return faculties
43 changes: 43 additions & 0 deletions src/parser/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from abc import ABC, abstractmethod
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from typing import Optional


class Parser(ABC):
def __init__(self, sheet: Worksheet) -> None:
self._sheet = sheet

@abstractmethod
def parse():
pass

def _search_cell(self, keyword: str, sheet: Worksheet) -> Optional[Cell]:
"""
keywordをシートから検索し、最初に見つけたセルを返す。
検索はA1→A2→B2→B1→A3→B3→C3→C2→C1
のようにA1から(max_row, max_column)に直線を引くような方向で検索をする。
Args:
keyword (str): 検索するキーワード
sheet (Worksheet): 検索対象のシート
"""
# 行か列の大きい方でループ
for i in range(max([sheet.max_column, sheet.max_row])):
column = i + 1
# columnを縦に検索
for j in range(column - 1):
row = i + 1
column = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)

# rowとcolumnが同じセルを検索
if sheet.cell(row=column, column=column).value == keyword:
return sheet.cell(row=column, column=column)

# rowを右から左に検索
for j in reversed(range(column - 1)):
row = j + 1
if sheet.cell(row=row, column=column).value == keyword:
return sheet.cell(row=row, column=column)
Binary file added tests/parser/files/~$single_sheet_file1.xlsx
Binary file not shown.
Binary file added tests/parser/files/~$single_sheet_file2.xlsx
Binary file not shown.
36 changes: 36 additions & 0 deletions tests/parser/single_sheet_test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
import os
import pytest
from typing import Generic, TypeVar

from src.parser.parser import Parser

T = TypeVar("T")


class SingleSheetTestBase(ABC, Generic[T]):
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx"
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx"
__book: openpyxl.Workbook

def setup_method(self, method):
pass

def teardown_method(self, method):
if self.__book:
self.__book.close()

def get_sheet(self, path) -> Worksheet:
self.__book = openpyxl.load_workbook(path)
return self.__book[self.__book.sheetnames[0]]

def parse(self, path: str) -> T:
sheet = self.get_sheet(path)
parser = self.get_parser(sheet)
return parser.parse()

@abstractmethod
def get_parser(self, sheet: Worksheet) -> Parser:
pass
53 changes: 15 additions & 38 deletions tests/parser/test_base_info_parser.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,33 @@
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
import os
import pytest
from src.models.base_info import BaseInfo
from src.parser.base_info_parser import BaseInfoParser
from src.parser.parser import Parser
from tests.parser.single_sheet_test_base import SingleSheetTestBase


class TestBaseInfoParser:
EXCEL_FILE_PATH_1 = f"{os.path.dirname(__file__)}/files/single_sheet_file1.xlsx"
EXCEL_FILE_PATH_2 = f"{os.path.dirname(__file__)}/files/single_sheet_file2.xlsx"
__book: openpyxl.Workbook

@pytest.fixture
def wrap(self):
self.set_up()
yield
self.tear_down()

def set_up(self):
pass

def tear_down(self):
if self.__book:
self.__book.close()

def get_sheet(self, path) -> Worksheet:
self.__book = openpyxl.load_workbook(path)
return self.__book[self.__book.sheetnames[0]]

def parse(self, path: str) -> BaseInfo:
sheet = self.get_sheet(path)
parser = BaseInfoParser(sheet)
return parser.parse()
class TestBaseInfoParser(SingleSheetTestBase[BaseInfo]):
def get_parser(self, sheet: Worksheet) -> Parser:
return BaseInfoParser(sheet)

@pytest.mark.parametrize(
"path, expect",
"path, exp",
[
(EXCEL_FILE_PATH_1, "F101110100010"),
(EXCEL_FILE_PATH_2, "F123310106522"),
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "F101110100010"),
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "F123310106522"),
],
)
def test_should_parse_school_code(self, path: str, expect: str):
def test_should_parse_school_code(self, path: str, exp: str):
base_info = self.parse(path)
assert base_info.school_code == expect
assert base_info.school_code == exp

@pytest.mark.parametrize(
"path, expect",
"path, exp",
[
(EXCEL_FILE_PATH_1, "寳金 清博"),
(EXCEL_FILE_PATH_2, "景山 節"),
(SingleSheetTestBase.EXCEL_FILE_PATH_1, "寳金 清博"),
(SingleSheetTestBase.EXCEL_FILE_PATH_2, "景山 節"),
],
)
def test_should_parse_president(self, path: str, expect: str):
def test_should_parse_president(self, path: str, exp: str):
base_info = self.parse(path)
assert base_info.president == expect
assert base_info.president == exp
Loading

0 comments on commit 82acec8

Please sign in to comment.