Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix test changes workflow #94

Merged
merged 6 commits into from
Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test_scraper_changes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ jobs:
name: Show changes in scraper results
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup python
uses: actions/setup-python@v1
with:
Expand Down
14 changes: 11 additions & 3 deletions jedeschule/pipelines/db_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,14 @@
from jedeschule.pipelines.school_pipeline import SchoolPipelineItem

Base = declarative_base()
engine = create_engine(os.environ.get("DATABASE_URL"), echo=False)
Session = sessionmaker(bind=engine)
session = Session()


def get_session():
engine = create_engine(os.environ.get("DATABASE_URL"), echo=False)
Session = sessionmaker(bind=engine)
session = Session()

return session


class School(Base):
Expand All @@ -36,6 +41,8 @@ class School(Base):

@staticmethod
def update_or_create(item: SchoolPipelineItem) -> School:
session = get_session()

school = session.query(School).get(item.info['id'])
if school:
session.query(School).filter_by(id=item.info['id']).update({**item.info, 'raw': item.item})
Expand All @@ -48,6 +55,7 @@ class DatabasePipeline(object):
def process_item(self, item, spider):
school = School.update_or_create(item)
try:
session = get_session()
session.add(school)
session.commit()
except SQLAlchemyError as e:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
alembic==1.3.3
Scrapy==1.7.3
Scrapy==2.4.1
requests==2.20.0
wget==3.2
xlrd==1.1.0
Expand Down
15 changes: 10 additions & 5 deletions test_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ def get_clean_item(data):


def compare_schools(new_school, old_school):
print()
print(f"Comparing {new_school.get('id')}")
new_school = sort_dict(new_school)
old_school = sort_dict(old_school)

Expand All @@ -46,12 +44,19 @@ def main():
data = load_data()
for school in data[:10]:
school_id = school.get('info').get('id')

print()
print('#'*10, f'Comparing {school_id}')

upstream_data = {}
try:
upstream_data = fetch_data(school_id)
upstream_data.pop('raw')
compare_schools(school.get('info'), upstream_data)
except HTTPError:
print(f"Could not fetch old data for school-id {school_id}")
except HTTPError as e:
print(f"WARN: Could not fetch old data for school-id {school_id}: {e}")
print()

compare_schools(school.get('info'), upstream_data)


if __name__ == "__main__":
Expand Down
11 changes: 10 additions & 1 deletion test_changes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@

set -e

CHANGED_SCRAPERS=$(git whatchanged --name-only --pretty="" origin..HEAD |
if [ $CI ]
then
HEAD_REF=${GITHUB_REF}
else
HEAD_REF="HEAD"
fi

echo "Using head reference: ${HEAD_REF}"

CHANGED_SCRAPERS=$(git whatchanged --name-only --pretty="" origin/master..${HEAD_REF} |
grep spiders |
grep -v helper |
sed 's/jedeschule\/spiders\///' |
Expand Down
4 changes: 3 additions & 1 deletion test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from jedeschule.items import School
from jedeschule.pipelines.school_pipeline import SchoolPipelineItem
from jedeschule.pipelines.db_pipeline import School as DBSchool, session
from jedeschule.pipelines.db_pipeline import School as DBSchool, get_session


class TestSchoolItem(Item):
Expand All @@ -23,6 +23,7 @@ def test_import_new(self):
item = dict(name='Test Schule', nr=1)
school_item: SchoolPipelineItem = SchoolPipelineItem(info=info, item=item)
db_item = DBSchool.update_or_create(school_item)
session = get_session()
session.add(db_item)
session.commit()

Expand All @@ -40,6 +41,7 @@ def test_import_existing(self):
item = dict(name='Test Schule', nr=1)
school_item: SchoolPipelineItem = SchoolPipelineItem(info=info, item=item)
db_item = DBSchool.update_or_create(school_item)
session = get_session()
session.add(db_item)
session.commit()

Expand Down