diff --git a/.github/workflows/build-report.yml b/.github/workflows/build-report.yml index 44ca93b45f6f..8e181d2277f0 100644 --- a/.github/workflows/build-report.yml +++ b/.github/workflows/build-report.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: schedule: # 1pm UTC is 6am PDT. - - cron: '0 13 * * *' + - cron: "0 13 * * *" jobs: build-report: @@ -19,7 +19,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install requests slack_sdk + pip install requests slack_sdk pyyaml - name: create and send report run: python ./tools/bin/build_report.py env: @@ -32,6 +32,6 @@ jobs: SLACK_USERNAME: Build Report SLACK_ICON: https://avatars.slack-edge.com/temp/2020-09-01/1342729352468_209b10acd6ff13a649a1.jpg SLACK_COLOR: ${{ job.status }} - SLACK_TITLE: 'Failed to create build report' - SLACK_MESSAGE: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}' + SLACK_TITLE: "Failed to create build report" + SLACK_MESSAGE: "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" MSG_MINIMAL: True diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 8c32c2057918..0e815d778c9d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -624,6 +624,7 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/orb icon: orb.svg sourceType: api + releaseStage: alpha - sourceDefinitionId: 3490c201-5d95-4783-b600-eaf07a4c7787 name: Outreach dockerRepository: airbyte/source-outreach @@ -870,6 +871,7 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/square icon: square.svg sourceType: api + releaseStage: alpha - sourceDefinitionId: 7a4327c4-315a-11ec-8d3d-0242ac130003 name: Strava dockerRepository: airbyte/source-strava @@ -948,6 +950,7 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/us-census icon: uscensus.svg sourceType: api + releaseStage: alpha - sourceDefinitionId: afa734e4-3571-11ec-991a-1e0031268139 name: YouTube Analytics dockerRepository: airbyte/source-youtube-analytics @@ -1002,6 +1005,7 @@ dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.io/integrations/sources/zenloop sourceType: api + releaseStage: alpha - sourceDefinitionId: cdaf146a-9b75-49fd-9dd2-9d64a0bb4781 name: Sentry dockerRepository: airbyte/source-sentry diff --git a/tools/bin/build_report.py b/tools/bin/build_report.py index 4a57c43781b3..964dbff94fd3 100644 --- a/tools/bin/build_report.py +++ b/tools/bin/build_report.py @@ -1,51 +1,64 @@ # -# MIT License -# -# Copyright (c) 2020 Airbyte -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +""" +All invocations of this script must be run from the Airbyte repository root. + +To Run tests: +pytest ./tools/bin/build_report.py + +To run the script: +pip install slack-sdk pyyaml +python ./tools/bin/build_report.py +""" + import os -import requests +import pathlib import re +import sys +from typing import Dict, List, Optional + +import requests +import yaml from slack_sdk import WebhookClient from slack_sdk.errors import SlackApiError +# Global statics +CONNECTOR_DEFINITIONS_DIR = "./airbyte-config/init/src/main/resources/seed" +SOURCE_DEFINITIONS_YAML = f"{CONNECTOR_DEFINITIONS_DIR}/source_definitions.yaml" +DESTINATION_DEFINITIONS_YAML = f"{CONNECTOR_DEFINITIONS_DIR}/destination_definitions.yaml" +CONNECTORS_ROOT_PATH = "./airbyte-integrations/connectors" + +# Global vars +TESTED_SOURCE = [] +TESTED_DESTINATION = [] +SUCCESS_SOURCE = [] +SUCCESS_DESTINATION = [] +NO_TESTS = [] +FAILED_LAST = [] +FAILED_2_LAST = [] + def get_status_page(connector) -> str: - response = requests.get(f'https://dnsgjos7lj2fu.cloudfront.net/tests/summary/{connector}/index.html') + response = requests.get(f"https://dnsgjos7lj2fu.cloudfront.net/tests/summary/{connector}/index.html") if response.status_code == 200: return response.text def parse(page) -> list: history = [] - for row in re.findall(r'(.*?)', page): - cols = re.findall(r'(.*?)', row) + for row in re.findall(r"(.*?)", page): + cols = re.findall(r"(.*?)", row) if not cols or len(cols) != 3: continue - history.append({ - 'date': cols[0], - 'status': re.findall(r' (\S+)', cols[1])[0], - 'link': re.findall(r'href="(.*?)"', cols[2])[0], - }) + history.append( + { + "date": cols[0], + "status": re.findall(r" (\S+)", cols[1])[0], + "link": re.findall(r'href="(.*?)"', cols[2])[0], + } + ) return history @@ -55,32 +68,32 @@ def check_connector(connector): # check if connector is tested if not status_page: NO_TESTS.append(connector) - print('F', end='', flush=True) + print("F", end="", flush=True) return - print('.', end='', flush=True) + print(".", end="", flush=True) - if connector.startswith('source'): + if connector.startswith("source"): TESTED_SOURCE.append(connector) - elif connector.startswith('destination'): + elif connector.startswith("destination"): TESTED_DESTINATION.append(connector) # order: recent values goes first history = parse(status_page) # order: recent values goes last - short_status = ''.join(['✅' if build['status'] == 'success' else '❌' for build in history[::-1]]) # ex: ❌✅✅❌✅✅❌❌ + short_status = "".join(["✅" if build["status"] == "success" else "❌" for build in history[::-1]]) # ex: ❌✅✅❌✅✅❌❌ # check latest build status last_build = history[0] - if last_build['status'] == 'success': - if connector.startswith('source'): + if last_build["status"] == "success": + if connector.startswith("source"): SUCCESS_SOURCE.append(connector) - elif connector.startswith('destination'): + elif connector.startswith("destination"): SUCCESS_DESTINATION.append(connector) else: - failed_today = [connector, short_status, last_build['link']] + failed_today = [connector, short_status, last_build["link"]] - if len(history) > 1 and history[1]['status'] != 'success': + if len(history) > 1 and history[1]["status"] != "success": FAILED_2_LAST.append(failed_today) return @@ -91,32 +104,29 @@ def failed_report(failed_report) -> str: max_name_len = max([len(connector[0]) for connector in failed_report]) max_status_len = max(len(connector[1]) for connector in failed_report) for connector in failed_report: - connector[0] = connector[0].ljust(max_name_len, ' ') - connector[1] = connector[1].rjust(max_status_len, ' ') - return '\n'.join([' '.join(connector) for connector in failed_report]) - + connector[0] = connector[0].ljust(max_name_len, " ") + connector[1] = connector[1].rjust(max_status_len, " ") + return "\n".join([" ".join(connector) for connector in failed_report]) -def create_report(connectors) -> str: - sources_len = len([name for name in connectors if name.startswith('source')]) - destinations_len = len([name for name in connectors if name.startswith('destination')]) +def create_report(connectors, statuses: List[str]) -> str: + sources_len = len([name for name in connectors if name.startswith("source")]) + destinations_len = len([name for name in connectors if name.startswith("destination")]) report = f""" -CONNECTORS: total: {len(connectors)} -Sources: total: {sources_len} / tested: {len(TESTED_SOURCE)} / success: {len(SUCCESS_SOURCE)} ({round(len(SUCCESS_SOURCE)/sources_len*100, 1)}%) -Destinations: total: {destinations_len} / tested: {len(TESTED_DESTINATION)} / success: {len(SUCCESS_DESTINATION)} ({round(len(SUCCESS_DESTINATION)/destinations_len*100, 1)}%) +CONNECTORS: total: {len(connectors)} {" & ".join(statuses)} connectors +Sources: total: {sources_len} / tested: {len(TESTED_SOURCE)} / success: {len(SUCCESS_SOURCE)} ({round(len(SUCCESS_SOURCE) / sources_len * 100, 1)}%) +Destinations: total: {destinations_len} / tested: {len(TESTED_DESTINATION)} / success: {len(SUCCESS_DESTINATION)} ({round(len(SUCCESS_DESTINATION) / destinations_len * 100, 1)}%) """ if FAILED_LAST: - report += f"FAILED LAST BUILD ONLY - {len(FAILED_LAST)} connectors:\n" + \ - failed_report(FAILED_LAST) + '\n\n' + report += f"FAILED LAST BUILD ONLY - {len(FAILED_LAST)} connectors:\n" + failed_report(FAILED_LAST) + "\n\n" if FAILED_2_LAST: - report += f"FAILED TWO LAST BUILDS - {len(FAILED_2_LAST)} connectors:\n" + \ - failed_report(FAILED_2_LAST) + '\n\n' + report += f"FAILED TWO LAST BUILDS - {len(FAILED_2_LAST)} connectors:\n" + failed_report(FAILED_2_LAST) + "\n\n" if NO_TESTS: - report += f"NO TESTS - {len(NO_TESTS)} connectors:\n" + '\n'.join(NO_TESTS) + '\n' + report += f"NO TESTS - {len(NO_TESTS)} connectors:\n" + "\n".join(NO_TESTS) + "\n" return report @@ -124,41 +134,119 @@ def create_report(connectors) -> str: def send_report(report): webhook = WebhookClient(os.environ["SLACK_BUILD_REPORT"]) try: + def chunk_messages(report): """split report into messages with no more than 4000 chars each (slack limitation)""" - msg = '' + msg = "" for line in report.splitlines(): - msg += line + '\n' + msg += line + "\n" if len(msg) > 3500: yield msg - msg = '' + msg = "" yield msg + for msg in chunk_messages(report): webhook.send(text=f"```{msg}```") - print(f'Report has been sent') + print("Report has been sent") except SlackApiError as e: - print(f'Unable to send report') + print("Unable to send report") assert e.response["error"] -TESTED_SOURCE = [] -TESTED_DESTINATION = [] -SUCCESS_SOURCE = [] -SUCCESS_DESTINATION = [] -NO_TESTS = [] -FAILED_LAST = [] -FAILED_2_LAST = [] +def parse_dockerfile_repository_label(dockerfile_contents: str) -> Optional[str]: + parsed_label = re.findall(r"LABEL io.airbyte.name=(.*)[\s\n]*", dockerfile_contents) + if len(parsed_label) == 1: + return parsed_label[0] + elif len(parsed_label) == 0: + return None + else: + raise Exception(f"found more than one label in dockerfile: {dockerfile_contents}") + + +def get_docker_label_to_connector_directory(base_directory: str, connector_module_names: List[str]) -> Dict[str, str]: + result = {} + for connector in connector_module_names: + # parse the dockerfile label if the dockerfile exists + dockerfile_path = pathlib.Path(base_directory, connector, "Dockerfile") + if os.path.isfile(dockerfile_path): + print(f"Reading f{dockerfile_path}") + with open(dockerfile_path, "r") as file: + dockerfile_contents = file.read() + label = parse_dockerfile_repository_label(dockerfile_contents) + if label: + result[label] = connector + else: + print(f"Couldn't find a connector label in {dockerfile_path}") + else: + print(f"Couldn't find a dockerfile at {dockerfile_path}") + return result + + +def get_connectors_with_release_stage(definitions_yaml: List, stages: List[str]) -> List[str]: + """returns e.g: ['airbyte/source-salesforce', ...] when given 'generally_available' as input""" + return [definition["dockerRepository"] for definition in definitions_yaml if definition.get("releaseStage", "alpha") in stages] + + +def read_definitions_yaml(path: str): + with open(path, "r") as file: + return yaml.safe_load(file) + + +def get_connectors_with_release_stages(base_directory: str, connectors: List[str], relevant_stages=["beta", "generally_available"]): + # TODO currently this also excludes shared libs like source-jdbc, we probably shouldn't do that, so we can get the build status of those + # modules as well. + connector_label_to_connector_directory = get_docker_label_to_connector_directory(base_directory, connectors) + + connectors_with_desired_status = get_connectors_with_release_stage( + read_definitions_yaml(SOURCE_DEFINITIONS_YAML), relevant_stages + ) + get_connectors_with_release_stage(read_definitions_yaml(DESTINATION_DEFINITIONS_YAML), relevant_stages) + # return appropriate directory names + return [ + connector_label_to_connector_directory[label] + for label in connectors_with_desired_status + if label in connector_label_to_connector_directory + ] + + +def setup_module(): + global pytest + global mock if __name__ == "__main__": - # find all connectors - connectors = sorted(os.listdir("./airbyte-integrations/connectors")) - print(f"Checking connectors: {len(connectors)}") + + # find all connectors and filter to beta and GA + connectors = sorted(os.listdir(CONNECTORS_ROOT_PATH)) + relevant_stages = ["beta", "generally_available"] + relevant_connectors = get_connectors_with_release_stages(CONNECTORS_ROOT_PATH, connectors, relevant_stages) + print(f"Checking {len(relevant_connectors)} relevant connectors out of {len(connectors)} total connectors") # analyse build results for each connector - [check_connector(connector) for connector in connectors] + [check_connector(connector) for connector in relevant_connectors] - report = create_report(connectors) + report = create_report(relevant_connectors, relevant_stages) print(report) - send_report(report) - print('Finish') + # send_report(report) + print("Finish") +elif "pytest" in sys.argv[0]: + import unittest + + class Tests(unittest.TestCase): + def test_filter_definitions_yaml(self): + mock_def_yaml = [ + {"releaseStage": "alpha", "dockerRepository": "alpha_connector"}, + {"releaseStage": "beta", "dockerRepository": "beta_connector"}, + {"releaseStage": "generally_available", "dockerRepository": "GA_connector"}, + ] + assert ["alpha_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["alpha"]) + assert ["alpha_connector", "beta_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["alpha", "beta"]) + assert ["beta_connector", "GA_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["beta", "generally_available"]) + assert ["GA_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["generally_available"]) + + def test_parse_dockerfile_label(self): + mock_dockerfile = """ +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=1.0.8 +LABEL io.airbyte.name=airbyte/source-salesforce""" + assert "airbyte/source-salesforce" == parse_dockerfile_repository_label(mock_dockerfile)