Skip to content

Commit

Permalink
connector-ops-ci: bootstrap qa engine (#21709)
Browse files Browse the repository at this point in the history
  • Loading branch information
alafanechere authored Jan 24, 2023
1 parent b9de100 commit 4770a8f
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 2 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/run-qa-engine.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Run QA Engine

on:
workflow_dispatch:
schedule:
# 1pm UTC is 6am PDT.
# same time as Generate Build Report
- cron: "0 13 * * *"

jobs:
run-qa-engine:
name: "Run QA Engine"
if: github.ref == 'refs/heads/master'
runs-on: ubuntu-latest
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Setup Cloud SDK
uses: google-github-actions/setup-gcloud@v0
with:
service_account_key: ${{ secrets.PROD_SPEC_CACHE_SA_KEY }}
export_default_credentials: true
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Install ci-connector-ops package
run: pip install --quiet -e ./tools/ci_connector_ops
- name: Run QA Engine
run: run-qa-engine
Empty file.
50 changes: 50 additions & 0 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import requests
import pandas as pd

CLOUD_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/cloud_catalog.json"
OSS_CATALOG_URL = "https://storage.googleapis.com/prod-airbyte-cloud-connector-metadata-service/oss_catalog.json"


def fetch_remote_catalog(catalog_url: str) -> pd.DataFrame:
"""Fetch a combined remote catalog and return a single DataFrame
with sources and destinations defined by the connector_type column.
Args:
catalog_url (str): The remote catalog url.
Returns:
pd.DataFrame: Sources and destinations combined under a denormalized DataFrame.
"""
raw_catalog = requests.get(catalog_url).json()
sources = pd.DataFrame(raw_catalog["sources"])
destinations = pd.DataFrame(raw_catalog["destinations"])
sources["connector_type"] = "source"
sources["connector_definition_id"] = sources.sourceDefinitionId
destinations["connector_type"] = "destination"
destinations["connector_definition_id"] = destinations.destinationDefinitionId
return pd.concat([sources, destinations])

def fetch_adoption_metrics_per_connector_version() -> pd.DataFrame:
"""Retrieve adoptions metrics for each connector version from our data warehouse.
Returns:
pd.DataFrame: A Dataframe with adoption metrics per connector version.
"""
# TODO: directly query BigQuery
# use query in https://airbyte.metabaseapp.com/question/1642-adoption-and-success-rate-per-connector-version-oss-cloud
return pd.DataFrame(columns=[
"connector_definition_id",
"connector_version",
"number_of_connections",
"number_of_users",
"sync_success_rate",
])

CLOUD_CATALOG = fetch_remote_catalog(CLOUD_CATALOG_URL)
OSS_CATALOG = fetch_remote_catalog(OSS_CATALOG_URL)
ADOPTION_METRICS_PER_CONNECTOR_VERSION = fetch_adoption_metrics_per_connector_version()
31 changes: 31 additions & 0 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import pandas as pd
from .models import QAReport

GCS_QA_REPORT_PATH = "gs://prod-airbyte-cloud-connector-metadata-service/qa_report.json"
DUMMY_REPORT = pd.DataFrame([
{
"connector_type": "source",
"connector_name": "test",
"docker_image_tag": "0.0.0",
"release_stage": "alpha",
"is_on_cloud": False,
"latest_build_is_successful": False,
"documentation_is_available": False,
"number_of_connections": 0,
"number_of_users": 0,
"sync_success_rate": .99
}
])

def write_qa_report_to_gcs(qa_report: pd.DataFrame, output_file_path: str):
# Validate the report structure with pydantic QAReport model.
QAReport(connectors_qa_report=qa_report.to_dict(orient="records"))
qa_report.to_json(output_file_path, orient="records")

def main():
write_qa_report_to_gcs(DUMMY_REPORT, GCS_QA_REPORT_PATH)
32 changes: 32 additions & 0 deletions tools/ci_connector_ops/ci_connector_ops/qa_engine/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


from enum import Enum
from typing import List
from pydantic import BaseModel

class ConnectorTypeEnum(str, Enum):
source = "source"
destination = "destination"

class ReleaseStageEnum(str, Enum):
alpha = "alpha"
beta = "beta"
generally_available = "generally_available"

class ConnectorQAReport(BaseModel):
connector_type: ConnectorTypeEnum
connector_name: str
docker_image_tag: str
release_stage: ReleaseStageEnum
is_on_cloud: bool
latest_build_is_successful: bool
documentation_is_available: bool
number_of_connections: int
number_of_users: int
sync_success_rate: float

class QAReport(BaseModel):
connectors_qa_report: List[ConnectorQAReport]
13 changes: 11 additions & 2 deletions tools/ci_connector_ops/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@

from setuptools import find_packages, setup

MAIN_REQUIREMENTS = ["requests", "PyYAML~=6.0", "GitPython~=3.1.29"]
MAIN_REQUIREMENTS = [
"requests",
"PyYAML~=6.0",
"GitPython~=3.1.29",
"pandas~=1.5.3",
"pydantic~=1.10.4",
"fsspec~=2023.1.0",
"gcsfs~=2023.1.0"
]


setup(
version="0.1.2",
version="0.1.3",
name="ci_connector_ops",
description="Packaged maintained by the connector operations team to perform CI for connectors",
author="Airbyte",
Expand All @@ -22,6 +30,7 @@
"check-test-strictness-level = ci_connector_ops.sat_config_checks:check_test_strictness_level",
"write-review-requirements-file = ci_connector_ops.sat_config_checks:write_review_requirements_file",
"print-mandatory-reviewers = ci_connector_ops.sat_config_checks:print_mandatory_reviewers",
"run-qa-engine = ci_connector_ops.qa_engine.main:main"
"run-qa-checks = ci_connector_ops.qa_checks:run_qa_checks"
],
},
Expand Down
Empty file.
30 changes: 30 additions & 0 deletions tools/ci_connector_ops/tests/test_qa_engine/test_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


import pandas as pd
import pytest

from ci_connector_ops.qa_engine import inputs

@pytest.mark.parametrize("catalog_url", [inputs.OSS_CATALOG_URL, inputs.CLOUD_CATALOG_URL])
def test_fetch_remote_catalog(catalog_url):
catalog = inputs.fetch_remote_catalog(catalog_url)
assert isinstance(catalog, pd.DataFrame)
expected_columns = ["connector_type", "connector_definition_id"]
assert all(expected_column in catalog.columns for expected_column in expected_columns)
assert set(catalog.connector_type.unique()) == {"source", "destination"}

def test_fetch_adoption_metrics_per_connector_version():
expected_columns = {
"connector_definition_id",
"connector_version",
"number_of_connections",
"number_of_users",
"sync_success_rate",
}

adoption_metrics_per_connector_version = inputs.fetch_adoption_metrics_per_connector_version()
assert len(adoption_metrics_per_connector_version) == 0
assert set(adoption_metrics_per_connector_version.columns) == expected_columns
11 changes: 11 additions & 0 deletions tools/ci_connector_ops/tests/test_qa_engine/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import pandas
from ci_connector_ops.qa_engine import main

def test_write_qa_report_to_gcs(tmp_path):
output_path = tmp_path / "output.json"
main.write_qa_report_to_gcs(main.DUMMY_REPORT, output_path)
assert pandas.read_json(output_path).to_dict() == main.DUMMY_REPORT.to_dict()

0 comments on commit 4770a8f

Please sign in to comment.