From 529b56db1099881d4cbb732014ba1eadea1e412e Mon Sep 17 00:00:00 2001 From: Uchechukwu Orji Date: Thu, 22 Aug 2024 14:56:09 +0100 Subject: [PATCH 1/2] add region to database --- backend/src/mirrors_qa_backend/cli/country.py | 53 ++++++++++++++ backend/src/mirrors_qa_backend/db/mirrors.py | 26 ++++++- backend/src/mirrors_qa_backend/db/models.py | 41 ++++++++++- backend/src/mirrors_qa_backend/db/region.py | 36 +++++++++ backend/src/mirrors_qa_backend/entrypoint.py | 30 ++++++++ backend/src/mirrors_qa_backend/extract.py | 2 + .../074ae280bb70_introduce_regions.py | 73 +++++++++++++++++++ backend/src/mirrors_qa_backend/schemas.py | 15 +++- backend/src/mirrors_qa_backend/serializer.py | 2 +- backend/tests/cli/test_country.py | 29 ++++++++ backend/tests/cli/test_mirror.py | 22 ++++++ backend/tests/conftest.py | 1 - backend/tests/db/test_mirrors.py | 1 - 13 files changed, 322 insertions(+), 9 deletions(-) create mode 100644 backend/src/mirrors_qa_backend/cli/country.py create mode 100644 backend/src/mirrors_qa_backend/db/region.py create mode 100644 backend/src/mirrors_qa_backend/migrations/versions/074ae280bb70_introduce_regions.py create mode 100644 backend/tests/cli/test_country.py create mode 100644 backend/tests/cli/test_mirror.py diff --git a/backend/src/mirrors_qa_backend/cli/country.py b/backend/src/mirrors_qa_backend/cli/country.py new file mode 100644 index 0000000..b582784 --- /dev/null +++ b/backend/src/mirrors_qa_backend/cli/country.py @@ -0,0 +1,53 @@ +import csv + +from mirrors_qa_backend import logger +from mirrors_qa_backend.db import Session +from mirrors_qa_backend.db.country import create_country +from mirrors_qa_backend.db.region import create_region +from mirrors_qa_backend.schemas import Country, Region + + +def create_regions_and_countries(countries: list[Country]) -> None: + """Create the region and associated countries in the database.""" + with Session.begin() as session: + for country in countries: + db_country = create_country( + session, + country_code=country.code, + country_name=country.name, + ) + if country.region: + db_region = create_region( + session, + region_code=country.region.code, + region_name=country.region.name, + ) + db_country.region = db_region + session.add(db_country) + + +def extract_country_regions_from_csv(csv_data: list[str]) -> list[Country]: + regions: list[Country] = [] + for row in csv.DictReader(csv_data): + country_code = row["country_iso_code"] + country_name = row["country_name"] + region_code = row["continent_code"] + region_name = row["continent_name"] + if all([country_code, country_name, region_code, region_name]): + regions.append( + Country( + code=country_code.lower(), + name=country_name.title(), + region=Region( + code=region_code.lower(), + name=region_name.title(), + ), + ) + ) + else: + logger.critical( + f"Skipping row with missing entries: country_code: {country_code}, " + f"country_name: {country_name}, region_code: {region_code}, " + f"region_name: {region_name}" + ) + return regions diff --git a/backend/src/mirrors_qa_backend/db/mirrors.py b/backend/src/mirrors_qa_backend/db/mirrors.py index fa2454f..3f6e4bf 100644 --- a/backend/src/mirrors_qa_backend/db/mirrors.py +++ b/backend/src/mirrors_qa_backend/db/mirrors.py @@ -4,8 +4,10 @@ from sqlalchemy.orm import Session as OrmSession from mirrors_qa_backend import logger, schemas +from mirrors_qa_backend.db.country import get_country_or_none from mirrors_qa_backend.db.exceptions import EmptyMirrorsError, RecordDoesNotExistError from mirrors_qa_backend.db.models import Mirror +from mirrors_qa_backend.db.region import get_region_or_none @dataclass @@ -16,6 +18,17 @@ class MirrorsUpdateResult: nb_mirrors_disabled: int = 0 +def update_mirror_country( + session: OrmSession, country_code: str, mirror: Mirror +) -> Mirror: + logger.debug("Updating mirror country information.") + mirror.country = get_country_or_none(session, country_code) + if mirror.country and mirror.country.region_code: + mirror.region = get_region_or_none(session, mirror.country.region_code) + session.add(mirror) + return mirror + + def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: """Number of mirrors created in the database. @@ -27,7 +40,6 @@ def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: id=mirror.id, base_url=mirror.base_url, enabled=mirror.enabled, - region=mirror.region, asn=mirror.asn, score=mirror.score, latitude=mirror.latitude, @@ -37,7 +49,12 @@ def create_mirrors(session: OrmSession, mirrors: list[schemas.Mirror]) -> int: as_only=mirror.as_only, other_countries=mirror.other_countries, ) + session.add(db_mirror) + + if mirror.country_code: + update_mirror_country(session, mirror.country_code, db_mirror) + logger.debug(f"Registered new mirror: {db_mirror.id}.") nb_created += 1 return nb_created @@ -90,6 +107,13 @@ def create_or_update_mirror_status( db_mirror.enabled = True session.add(db_mirror) result.nb_mirrors_added += 1 + + # New mirrors DB model contain country data. As such, we update the + # country information regardless of the status update. + if db_mirror_id in current_mirrors: + country_code = current_mirrors[db_mirror_id].country_code + if country_code: + update_mirror_country(session, country_code, db_mirror) return result diff --git a/backend/src/mirrors_qa_backend/db/models.py b/backend/src/mirrors_qa_backend/db/models.py index e3b5ff0..d9edfcf 100644 --- a/backend/src/mirrors_qa_backend/db/models.py +++ b/backend/src/mirrors_qa_backend/db/models.py @@ -64,6 +64,21 @@ class WorkerCountry(Base): ) +class Region(Base): + """Continental region.""" + + __tablename__ = "region" + + code: Mapped[str] = mapped_column(primary_key=True) # continent code + name: Mapped[str] # continent name + countries: Mapped[list[Country]] = relationship( + back_populates="region", init=False, repr=False + ) + mirrors: Mapped[list[Mirror]] = relationship( + back_populates="region", init=False, repr=False + ) + + class Country(Base): """Country where a worker runs tests for a mirror.""" @@ -74,6 +89,17 @@ class Country(Base): ) # two-letter country codes as defined in ISO 3166-1 name: Mapped[str] # full name of the country (in English) + region_code: Mapped[str | None] = mapped_column( + ForeignKey("region.code"), init=False, default=None + ) + + region: Mapped[Region | None] = relationship( + back_populates="countries", init=False, repr=False + ) + + mirrors: Mapped[list[Mirror]] = relationship( + back_populates="country", init=False, repr=False + ) workers: Mapped[list[Worker]] = relationship( back_populates="countries", @@ -91,8 +117,13 @@ class Mirror(Base): id: Mapped[str] = mapped_column(primary_key=True) # hostname of a mirror URL base_url: Mapped[str] enabled: Mapped[bool] + region_code: Mapped[str | None] = mapped_column( + ForeignKey("region.code"), init=False, default=None + ) + country_code: Mapped[str | None] = mapped_column( + ForeignKey("country.code"), init=False, default=None + ) # metadata of a mirror from MirroBrain (https://mirrorbrain-docs.readthedocs.io/en/latest/mirrors.html#displaying-details-about-a-mirror) - region: Mapped[str | None] = mapped_column(default=None) asn: Mapped[str | None] = mapped_column(default=None) score: Mapped[int | None] = mapped_column(default=None) latitude: Mapped[float | None] = mapped_column(default=None) @@ -106,6 +137,14 @@ class Mirror(Base): back_populates="mirror", init=False, repr=False ) + country: Mapped[Country | None] = relationship( + back_populates="mirrors", init=False, repr=False + ) + + region: Mapped[Region | None] = relationship( + back_populates="mirrors", init=False, repr=False + ) + __table_args__ = (UniqueConstraint("base_url"),) diff --git a/backend/src/mirrors_qa_backend/db/region.py b/backend/src/mirrors_qa_backend/db/region.py new file mode 100644 index 0000000..67ed194 --- /dev/null +++ b/backend/src/mirrors_qa_backend/db/region.py @@ -0,0 +1,36 @@ +from sqlalchemy import select +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.orm import Session as OrmSession + +from mirrors_qa_backend.db.exceptions import RecordDoesNotExistError +from mirrors_qa_backend.db.models import Country, Region + + +def get_countries(session: OrmSession, region_code: str) -> list[Country]: + """Get countries belonging to the provided region.""" + + return list( + session.scalars(select(Country).where(Country.region_code == region_code)).all() + ) + + +def get_region_or_none(session: OrmSession, region_code: str) -> Region | None: + return session.scalars( + select(Region).where(Region.code == region_code) + ).one_or_none() + + +def get_region(session: OrmSession, region_code: str) -> Region: + if region := get_region_or_none(session, region_code): + return region + raise RecordDoesNotExistError(f"Region with code {region_code} does not exist.") + + +def create_region(session: OrmSession, *, region_code: str, region_name: str) -> Region: + """Creates a new continental region in the database.""" + session.execute( + insert(Region) + .values(code=region_code, name=region_name) + .on_conflict_do_nothing(index_elements=["code"]) + ) + return get_region(session, region_code) diff --git a/backend/src/mirrors_qa_backend/entrypoint.py b/backend/src/mirrors_qa_backend/entrypoint.py index 53fdea4..2be1bc8 100644 --- a/backend/src/mirrors_qa_backend/entrypoint.py +++ b/backend/src/mirrors_qa_backend/entrypoint.py @@ -6,6 +6,10 @@ from mirrors_qa_backend import logger from mirrors_qa_backend.__about__ import __version__ +from mirrors_qa_backend.cli.country import ( + create_regions_and_countries, + extract_country_regions_from_csv, +) from mirrors_qa_backend.cli.mirrors import update_mirrors from mirrors_qa_backend.cli.scheduler import main as start_scheduler from mirrors_qa_backend.cli.worker import create_worker, update_worker @@ -15,6 +19,7 @@ CREATE_WORKER_CLI = "create-worker" UPDATE_WORKER_CLI = "update-worker" SCHEDULER_CLI = "scheduler" +CREATE_COUNTRY_REGIONS_CLI = "create-countries" def main(): @@ -95,6 +100,18 @@ def main(): UPDATE_WORKER_CLI, help="Update a worker", parents=[worker_parser] ) + create_country_regions_cli = subparsers.add_parser( + CREATE_COUNTRY_REGIONS_CLI, help="Create countries and associated regions." + ) + create_country_regions_cli.add_argument( + "country_region_csv_file", + metavar="csv-file", + type=argparse.FileType("r", encoding="utf-8"), + nargs="?", + default=sys.stdin, + help="CSV file containing countries and associated regions (default: stdin).", + ) + args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) @@ -137,6 +154,19 @@ def main(): logger.error(f"error while updating worker: {exc!s}") sys.exit(1) logger.info(f"Updated countries for worker {args.worker_id!r}") + elif args.cli_name == CREATE_COUNTRY_REGIONS_CLI: + try: + logger.debug("Creating regions and associated countries.") + + create_regions_and_countries( + extract_country_regions_from_csv( + args.country_region_csv_file.readlines() + ) + ) + except Exception as exc: + logger.error(f"error while creating regions: {exc!s}") + sys.exit(1) + logger.info("Created regions and associated countries.") else: args.print_help() diff --git a/backend/src/mirrors_qa_backend/extract.py b/backend/src/mirrors_qa_backend/extract.py index be81bed..7d2e41f 100644 --- a/backend/src/mirrors_qa_backend/extract.py +++ b/backend/src/mirrors_qa_backend/extract.py @@ -51,6 +51,7 @@ def is_country_row(tag: Tag) -> bool: hostname: Any = urlsplit( base_url ).netloc # pyright: ignore [reportUnknownMemberType] + country_code = row.find("img")["alt"].lower() if hostname in Settings.MIRRORS_EXCLUSION_LIST: continue mirrors.append( @@ -58,6 +59,7 @@ def is_country_row(tag: Tag) -> bool: id=hostname, base_url=base_url, enabled=True, + country_code=country_code, ) ) return mirrors diff --git a/backend/src/mirrors_qa_backend/migrations/versions/074ae280bb70_introduce_regions.py b/backend/src/mirrors_qa_backend/migrations/versions/074ae280bb70_introduce_regions.py new file mode 100644 index 0000000..a0710f6 --- /dev/null +++ b/backend/src/mirrors_qa_backend/migrations/versions/074ae280bb70_introduce_regions.py @@ -0,0 +1,73 @@ +"""introduce regions + +Revision ID: 074ae280bb70 +Revises: 17d587447299 +Create Date: 2024-08-22 11:57:17.239215 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "074ae280bb70" +down_revision = "17d587447299" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "region", + sa.Column("code", sa.String(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.PrimaryKeyConstraint("code", name=op.f("pk_region")), + ) + op.add_column("country", sa.Column("region_code", sa.String(), nullable=True)) + op.create_foreign_key( + op.f("fk_country_region_code_region"), + "country", + "region", + ["region_code"], + ["code"], + ) + op.add_column("mirror", sa.Column("region_code", sa.String(), nullable=True)) + op.add_column("mirror", sa.Column("country_code", sa.String(), nullable=True)) + op.create_foreign_key( + op.f("fk_mirror_country_code_country"), + "mirror", + "country", + ["country_code"], + ["code"], + ) + op.create_foreign_key( + op.f("fk_mirror_region_code_region"), + "mirror", + "region", + ["region_code"], + ["code"], + ) + op.drop_column("mirror", "region") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "mirror", sa.Column("region", sa.VARCHAR(), autoincrement=False, nullable=True) + ) + op.drop_constraint( + op.f("fk_mirror_region_code_region"), "mirror", type_="foreignkey" + ) + op.drop_constraint( + op.f("fk_mirror_country_code_country"), "mirror", type_="foreignkey" + ) + op.drop_column("mirror", "country_code") + op.drop_column("mirror", "region_code") + op.drop_constraint( + op.f("fk_country_region_code_region"), "country", type_="foreignkey" + ) + op.drop_column("country", "region_code") + op.drop_table("region") + # ### end Alembic commands ### diff --git a/backend/src/mirrors_qa_backend/schemas.py b/backend/src/mirrors_qa_backend/schemas.py index 68c6647..62600fa 100644 --- a/backend/src/mirrors_qa_backend/schemas.py +++ b/backend/src/mirrors_qa_backend/schemas.py @@ -13,6 +13,9 @@ class BaseModel(pydantic.BaseModel): model_config = ConfigDict(use_enum_values=True, from_attributes=True) +ISO3166AlphaCode = Annotated[str, Field(min_length=2, max_length=2)] + + class Mirror(BaseModel): id: str # hostname of a mirror URL base_url: str @@ -25,7 +28,8 @@ class Mirror(BaseModel): country_only: bool | None = None region_only: bool | None = None as_only: bool | None = None - other_countries: list[str] | None = None + country_code: ISO3166AlphaCode | None = None + other_countries: list[ISO3166AlphaCode] | None = None class UpdateTestModel(BaseModel): @@ -57,12 +61,15 @@ class Paginator(BaseModel): last_page: int | None = None -ISOCountryCode = Annotated[str, Field(min_length=2, max_length=2)] +class Region(BaseModel): + code: ISO3166AlphaCode + name: str class Country(BaseModel): - code: ISOCountryCode # two-letter country code as defined in ISO 3166-1 + code: ISO3166AlphaCode # two-letter country code as defined in ISO 3166-1 name: str # full name of the country (in English) + region: Region | None = None class WorkerCountries(BaseModel): @@ -70,7 +77,7 @@ class WorkerCountries(BaseModel): class UpdateWorkerCountries(BaseModel): - country_codes: list[ISOCountryCode] + country_codes: list[ISO3166AlphaCode] class TestsList(BaseModel): diff --git a/backend/src/mirrors_qa_backend/serializer.py b/backend/src/mirrors_qa_backend/serializer.py index b83628d..c6acec5 100644 --- a/backend/src/mirrors_qa_backend/serializer.py +++ b/backend/src/mirrors_qa_backend/serializer.py @@ -27,8 +27,8 @@ def serialize_mirror(mirror: models.Mirror) -> schemas.Mirror: id=mirror.id, base_url=mirror.base_url, enabled=mirror.enabled, - region=mirror.region, asn=mirror.asn, + country_code=mirror.country_code if mirror.country_code else None, score=mirror.score, latitude=mirror.latitude, longitude=mirror.longitude, diff --git a/backend/tests/cli/test_country.py b/backend/tests/cli/test_country.py new file mode 100644 index 0000000..6dfefa0 --- /dev/null +++ b/backend/tests/cli/test_country.py @@ -0,0 +1,29 @@ +from sqlalchemy.orm import Session as OrmSession + +from mirrors_qa_backend.cli.country import ( + create_regions_and_countries, + extract_country_regions_from_csv, +) +from mirrors_qa_backend.db.country import get_country +from mirrors_qa_backend.db.region import get_region + + +def test_create_regions_and_countries(dbsession: OrmSession): + csv_data = [ + "country_iso_code,country_name,continent_code,continent_name", + "ng,Nigeria,af,Africa", + "fr,France,eu,Europe", + ] + + countries = extract_country_regions_from_csv(csv_data) + create_regions_and_countries(countries) + + for country in countries: + assert country.region is not None + db_country = get_country(dbsession, country.code) + assert db_country.code == country.code + assert db_country.name == country.name + db_region = get_region(dbsession, country.region.code) + assert db_region.code == country.region.code + assert db_region.name == country.region.name + assert db_country.region == db_region diff --git a/backend/tests/cli/test_mirror.py b/backend/tests/cli/test_mirror.py new file mode 100644 index 0000000..93238c4 --- /dev/null +++ b/backend/tests/cli/test_mirror.py @@ -0,0 +1,22 @@ +from sqlalchemy.orm import Session as OrmSession + +from mirrors_qa_backend.db import models +from mirrors_qa_backend.db.mirrors import update_mirror_country + + +def test_update_mirror_region_and_country( + dbsession: OrmSession, db_mirror: models.Mirror +): + + # Set up a country and region in the database. + region = models.Region(code="eu", name="Europe") + dbsession.add(region) + + country = models.Country(code="fr", name="France") + country.region = region + dbsession.add(country) + + db_mirror = update_mirror_country(dbsession, country.code, db_mirror) + assert db_mirror.country is not None + assert db_mirror.country == country + assert db_mirror.region == region diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index e38eb02..0092fde 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -154,7 +154,6 @@ def db_mirror(dbsession: OrmSession) -> Mirror: id="mirror-sites-in.mblibrary.info", base_url="https://mirror-sites-in.mblibrary.info/mirror-sites/download.kiwix.org/", enabled=True, - region=None, asn=None, score=None, latitude=None, diff --git a/backend/tests/db/test_mirrors.py b/backend/tests/db/test_mirrors.py index c40d489..6a674df 100644 --- a/backend/tests/db/test_mirrors.py +++ b/backend/tests/db/test_mirrors.py @@ -64,7 +64,6 @@ def test_re_enable_existing_mirror( id="mirrors.dotsrc.org", base_url="https://mirrors.dotsrc.org/kiwix/", enabled=False, - region=None, asn=None, score=None, latitude=None, From edd6bb4acda30995888d3babcbc31ab25ba2899b Mon Sep 17 00:00:00 2001 From: Uchechukwu Orji Date: Fri, 23 Aug 2024 09:28:55 +0100 Subject: [PATCH 2/2] make function names and help more descriptive --- backend/src/mirrors_qa_backend/db/region.py | 2 +- backend/src/mirrors_qa_backend/entrypoint.py | 5 ++++- backend/src/mirrors_qa_backend/schemas.py | 12 ++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/backend/src/mirrors_qa_backend/db/region.py b/backend/src/mirrors_qa_backend/db/region.py index 67ed194..90f044a 100644 --- a/backend/src/mirrors_qa_backend/db/region.py +++ b/backend/src/mirrors_qa_backend/db/region.py @@ -6,7 +6,7 @@ from mirrors_qa_backend.db.models import Country, Region -def get_countries(session: OrmSession, region_code: str) -> list[Country]: +def get_countries_for(session: OrmSession, region_code: str) -> list[Country]: """Get countries belonging to the provided region.""" return list( diff --git a/backend/src/mirrors_qa_backend/entrypoint.py b/backend/src/mirrors_qa_backend/entrypoint.py index 2be1bc8..b022086 100644 --- a/backend/src/mirrors_qa_backend/entrypoint.py +++ b/backend/src/mirrors_qa_backend/entrypoint.py @@ -109,7 +109,10 @@ def main(): type=argparse.FileType("r", encoding="utf-8"), nargs="?", default=sys.stdin, - help="CSV file containing countries and associated regions (default: stdin).", + help=( + "CSV file containing countries and associated regions " + "(format: Maxmind's GeoIPLite Country Locations csv) (default: stdin)." + ), ) args = parser.parse_args() diff --git a/backend/src/mirrors_qa_backend/schemas.py b/backend/src/mirrors_qa_backend/schemas.py index 62600fa..9dbb934 100644 --- a/backend/src/mirrors_qa_backend/schemas.py +++ b/backend/src/mirrors_qa_backend/schemas.py @@ -13,7 +13,7 @@ class BaseModel(pydantic.BaseModel): model_config = ConfigDict(use_enum_values=True, from_attributes=True) -ISO3166AlphaCode = Annotated[str, Field(min_length=2, max_length=2)] +ISO3166Alpha2Code = Annotated[str, Field(min_length=2, max_length=2)] class Mirror(BaseModel): @@ -28,8 +28,8 @@ class Mirror(BaseModel): country_only: bool | None = None region_only: bool | None = None as_only: bool | None = None - country_code: ISO3166AlphaCode | None = None - other_countries: list[ISO3166AlphaCode] | None = None + country_code: ISO3166Alpha2Code | None = None + other_countries: list[ISO3166Alpha2Code] | None = None class UpdateTestModel(BaseModel): @@ -62,12 +62,12 @@ class Paginator(BaseModel): class Region(BaseModel): - code: ISO3166AlphaCode + code: ISO3166Alpha2Code name: str class Country(BaseModel): - code: ISO3166AlphaCode # two-letter country code as defined in ISO 3166-1 + code: ISO3166Alpha2Code # two-letter country code as defined in ISO 3166-1 name: str # full name of the country (in English) region: Region | None = None @@ -77,7 +77,7 @@ class WorkerCountries(BaseModel): class UpdateWorkerCountries(BaseModel): - country_codes: list[ISO3166AlphaCode] + country_codes: list[ISO3166Alpha2Code] class TestsList(BaseModel):