diff --git a/.github/workflows/run-qa-engine.yml b/.github/workflows/run-qa-engine.yml new file mode 100644 index 000000000000..717e9db0198b --- /dev/null +++ b/.github/workflows/run-qa-engine.yml @@ -0,0 +1,30 @@ +name: Run QA Engine + +on: + workflow_dispatch: + schedule: + # 1pm UTC is 6am PDT. + # same time as Generate Build Report + - cron: "0 13 * * *" + +jobs: + run-qa-engine: + name: "Run QA Engine" + if: github.ref == 'refs/heads/master' + runs-on: ubuntu-latest + steps: + - name: Checkout Airbyte + uses: actions/checkout@v3 + - name: Setup Cloud SDK + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_key: ${{ secrets.PROD_SPEC_CACHE_SA_KEY }} + export_default_credentials: true + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: "3.9" + - name: Install ci-connector-ops package + run: pip install --quiet -e ./tools/ci_connector_ops + - name: Run QA Engine + run: run-qa-engine diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/__init__.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py new file mode 100644 index 000000000000..1e560df0be56 --- /dev/null +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py @@ -0,0 +1,50 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import requests +import pandas as pd + +CLOUD_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/cloud_catalog.json" +OSS_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/oss_catalog.json" + + +def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame: + """Fetch a combined remote catalog and return a single DataFrame + with sources and destinations defined by the connector_type column. + + Args: + catalog_url (str): The remote catalog url. + + Returns: + pd.DataFrame: Sources and destinations combined under a denormalized DataFrame. + """ + raw_catalog = requests.get(catalog_url).json() + sources = pd.DataFrame(raw_catalog["sources"]) + destinations = pd.DataFrame(raw_catalog["destinations"]) + sources["connector_type"] = "source" + sources["connector_definition_id"] = sources.sourceDefinitionId + destinations["connector_type"] = "destination" + destinations["connector_definition_id"] = destinations.destinationDefinitionId + return pd.concat([sources, destinations]) + +def fetch_adoption_metrics_per_connector_version() -> pd.DataFrame: + """Retrieve adoptions metrics for each connector version from our data warehouse. + + Returns: + pd.DataFrame: A Dataframe with adoption metrics per connector version. + """ + # TODO: directly query BigQuery + # use query in https://airbyte.metabaseapp.com/question/1642-adoption-and-success-rate-per-connector-version-oss-cloud + return pd.DataFrame(columns=[ + "connector_definition_id", + "connector_version", + "number_of_connections", + "number_of_users", + "sync_success_rate", + ]) + +CLOUD_CATALOG = fetch_remote_catalog(CLOUD_CATALOG_URL) +OSS_CATALOG = fetch_remote_catalog(OSS_CATALOG_URL) +ADOPTION_METRICS_PER_CONNECTOR_VERSION = fetch_adoption_metrics_per_connector_version() diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py new file mode 100644 index 000000000000..7d632788aa6a --- /dev/null +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pandas as pd +from .models import QAReport + +GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json" +DUMMY_REPORT = pd.DataFrame([ + { + "connector_type": "source", + "connector_name": "test", + "docker_image_tag": "0.0.0", + "release_stage": "alpha", + "is_on_cloud": False, + "latest_build_is_successful": False, + "documentation_is_available": False, + "number_of_connections": 0, + "number_of_users": 0, + "sync_success_rate": .99 + } + ]) + +def write_qa_report_to_gcs(qa_report: pd.DataFrame, output_file_path: str): + # Validate the report structure with pydantic QAReport model. + QAReport(connectors_qa_report=qa_report.to_dict(orient="records")) + qa_report.to_json(output_file_path, orient="records") + +def main(): + write_qa_report_to_gcs(DUMMY_REPORT, GCS_QA_REPORT_PATH) diff --git a/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py new file mode 100644 index 000000000000..eec91b90eee7 --- /dev/null +++ b/tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py @@ -0,0 +1,32 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from enum import Enum +from typing import List +from pydantic import BaseModel + +class ConnectorTypeEnum(str, Enum): + source = "source" + destination = "destination" + +class ReleaseStageEnum(str, Enum): + alpha = "alpha" + beta = "beta" + generally_available = "generally_available" + +class ConnectorQAReport(BaseModel): + connector_type: ConnectorTypeEnum + connector_name: str + docker_image_tag: str + release_stage: ReleaseStageEnum + is_on_cloud: bool + latest_build_is_successful: bool + documentation_is_available: bool + number_of_connections: int + number_of_users: int + sync_success_rate: float + +class QAReport(BaseModel): + connectors_qa_report: List[ConnectorQAReport] diff --git a/tools/ci_connector_ops/setup.py b/tools/ci_connector_ops/setup.py index c573c8d7148b..0fc79a9f6e1e 100644 --- a/tools/ci_connector_ops/setup.py +++ b/tools/ci_connector_ops/setup.py @@ -5,11 +5,19 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["requests", "PyYAML~=6.0", "GitPython~=3.1.29"] +MAIN_REQUIREMENTS = [ + "requests", + "PyYAML~=6.0", + "GitPython~=3.1.29", + "pandas~=1.5.3", + "pydantic~=1.10.4", + "fsspec~=2023.1.0", + "gcsfs~=2023.1.0" +] setup( - version="0.1.2", + version="0.1.3", name="ci_connector_ops", description="Packaged maintained by the connector operations team to perform CI for connectors", author="Airbyte", @@ -22,6 +30,7 @@ "check-test-strictness-level = ci_connector_ops.sat_config_checks:check_test_strictness_level", "write-review-requirements-file = ci_connector_ops.sat_config_checks:write_review_requirements_file", "print-mandatory-reviewers = ci_connector_ops.sat_config_checks:print_mandatory_reviewers", + "run-qa-engine = ci_connector_ops.qa_engine.main:main" "run-qa-checks = ci_connector_ops.qa_checks:run_qa_checks" ], }, diff --git a/tools/ci_connector_ops/tests/test_qa_engine/__init__.py b/tools/ci_connector_ops/tests/test_qa_engine/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_inputs.py b/tools/ci_connector_ops/tests/test_qa_engine/test_inputs.py new file mode 100644 index 000000000000..d0a4fb037e03 --- /dev/null +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_inputs.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pandas as pd +import pytest + +from ci_connector_ops.qa_engine import inputs + +@pytest.mark.parametrize("catalog_url", [inputs.OSS_CATALOG_URL, inputs.CLOUD_CATALOG_URL]) +def test_fetch_remote_catalog(catalog_url): + catalog = inputs.fetch_remote_catalog(catalog_url) + assert isinstance(catalog, pd.DataFrame) + expected_columns = ["connector_type", "connector_definition_id"] + assert all(expected_column in catalog.columns for expected_column in expected_columns) + assert set(catalog.connector_type.unique()) == {"source", "destination"} + +def test_fetch_adoption_metrics_per_connector_version(): + expected_columns = { + "connector_definition_id", + "connector_version", + "number_of_connections", + "number_of_users", + "sync_success_rate", + } + + adoption_metrics_per_connector_version = inputs.fetch_adoption_metrics_per_connector_version() + assert len(adoption_metrics_per_connector_version) == 0 + assert set(adoption_metrics_per_connector_version.columns) == expected_columns diff --git a/tools/ci_connector_ops/tests/test_qa_engine/test_main.py b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py new file mode 100644 index 000000000000..815568e2d8a9 --- /dev/null +++ b/tools/ci_connector_ops/tests/test_qa_engine/test_main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import pandas +from ci_connector_ops.qa_engine import main + +def test_write_qa_report_to_gcs(tmp_path): + output_path = tmp_path / "output.json" + main.write_qa_report_to_gcs(main.DUMMY_REPORT, output_path) + assert pandas.read_json(output_path).to_dict() == main.DUMMY_REPORT.to_dict()