diff --git a/.github/workflows/build-report.yml b/.github/workflows/build-report.yml
index 44ca93b45f6f..8e181d2277f0 100644
--- a/.github/workflows/build-report.yml
+++ b/.github/workflows/build-report.yml
@@ -4,7 +4,7 @@ on:
workflow_dispatch:
schedule:
# 1pm UTC is 6am PDT.
- - cron: '0 13 * * *'
+ - cron: "0 13 * * *"
jobs:
build-report:
@@ -19,7 +19,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install requests slack_sdk
+ pip install requests slack_sdk pyyaml
- name: create and send report
run: python ./tools/bin/build_report.py
env:
@@ -32,6 +32,6 @@ jobs:
SLACK_USERNAME: Build Report
SLACK_ICON: https://avatars.slack-edge.com/temp/2020-09-01/1342729352468_209b10acd6ff13a649a1.jpg
SLACK_COLOR: ${{ job.status }}
- SLACK_TITLE: 'Failed to create build report'
- SLACK_MESSAGE: 'https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}'
+ SLACK_TITLE: "Failed to create build report"
+ SLACK_MESSAGE: "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
MSG_MINIMAL: True
diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
index 8c32c2057918..0e815d778c9d 100644
--- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
+++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml
@@ -624,6 +624,7 @@
documentationUrl: https://docs.airbyte.io/integrations/sources/orb
icon: orb.svg
sourceType: api
+ releaseStage: alpha
- sourceDefinitionId: 3490c201-5d95-4783-b600-eaf07a4c7787
name: Outreach
dockerRepository: airbyte/source-outreach
@@ -870,6 +871,7 @@
documentationUrl: https://docs.airbyte.io/integrations/sources/square
icon: square.svg
sourceType: api
+ releaseStage: alpha
- sourceDefinitionId: 7a4327c4-315a-11ec-8d3d-0242ac130003
name: Strava
dockerRepository: airbyte/source-strava
@@ -948,6 +950,7 @@
documentationUrl: https://docs.airbyte.io/integrations/sources/us-census
icon: uscensus.svg
sourceType: api
+ releaseStage: alpha
- sourceDefinitionId: afa734e4-3571-11ec-991a-1e0031268139
name: YouTube Analytics
dockerRepository: airbyte/source-youtube-analytics
@@ -1002,6 +1005,7 @@
dockerImageTag: 0.1.1
documentationUrl: https://docs.airbyte.io/integrations/sources/zenloop
sourceType: api
+ releaseStage: alpha
- sourceDefinitionId: cdaf146a-9b75-49fd-9dd2-9d64a0bb4781
name: Sentry
dockerRepository: airbyte/source-sentry
diff --git a/tools/bin/build_report.py b/tools/bin/build_report.py
index 4a57c43781b3..964dbff94fd3 100644
--- a/tools/bin/build_report.py
+++ b/tools/bin/build_report.py
@@ -1,51 +1,64 @@
#
-# MIT License
-#
-# Copyright (c) 2020 Airbyte
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
+# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
+"""
+All invocations of this script must be run from the Airbyte repository root.
+
+To Run tests:
+pytest ./tools/bin/build_report.py
+
+To run the script:
+pip install slack-sdk pyyaml
+python ./tools/bin/build_report.py
+"""
+
import os
-import requests
+import pathlib
import re
+import sys
+from typing import Dict, List, Optional
+
+import requests
+import yaml
from slack_sdk import WebhookClient
from slack_sdk.errors import SlackApiError
+# Global statics
+CONNECTOR_DEFINITIONS_DIR = "./airbyte-config/init/src/main/resources/seed"
+SOURCE_DEFINITIONS_YAML = f"{CONNECTOR_DEFINITIONS_DIR}/source_definitions.yaml"
+DESTINATION_DEFINITIONS_YAML = f"{CONNECTOR_DEFINITIONS_DIR}/destination_definitions.yaml"
+CONNECTORS_ROOT_PATH = "./airbyte-integrations/connectors"
+
+# Global vars
+TESTED_SOURCE = []
+TESTED_DESTINATION = []
+SUCCESS_SOURCE = []
+SUCCESS_DESTINATION = []
+NO_TESTS = []
+FAILED_LAST = []
+FAILED_2_LAST = []
+
def get_status_page(connector) -> str:
- response = requests.get(f'https://dnsgjos7lj2fu.cloudfront.net/tests/summary/{connector}/index.html')
+ response = requests.get(f"https://dnsgjos7lj2fu.cloudfront.net/tests/summary/{connector}/index.html")
if response.status_code == 200:
return response.text
def parse(page) -> list:
history = []
- for row in re.findall(r'
(.*?)
', page):
- cols = re.findall(r'(.*?) | ', row)
+ for row in re.findall(r"(.*?)
", page):
+ cols = re.findall(r"(.*?) | ", row)
if not cols or len(cols) != 3:
continue
- history.append({
- 'date': cols[0],
- 'status': re.findall(r' (\S+)', cols[1])[0],
- 'link': re.findall(r'href="(.*?)"', cols[2])[0],
- })
+ history.append(
+ {
+ "date": cols[0],
+ "status": re.findall(r" (\S+)", cols[1])[0],
+ "link": re.findall(r'href="(.*?)"', cols[2])[0],
+ }
+ )
return history
@@ -55,32 +68,32 @@ def check_connector(connector):
# check if connector is tested
if not status_page:
NO_TESTS.append(connector)
- print('F', end='', flush=True)
+ print("F", end="", flush=True)
return
- print('.', end='', flush=True)
+ print(".", end="", flush=True)
- if connector.startswith('source'):
+ if connector.startswith("source"):
TESTED_SOURCE.append(connector)
- elif connector.startswith('destination'):
+ elif connector.startswith("destination"):
TESTED_DESTINATION.append(connector)
# order: recent values goes first
history = parse(status_page)
# order: recent values goes last
- short_status = ''.join(['✅' if build['status'] == 'success' else '❌' for build in history[::-1]]) # ex: ❌✅✅❌✅✅❌❌
+ short_status = "".join(["✅" if build["status"] == "success" else "❌" for build in history[::-1]]) # ex: ❌✅✅❌✅✅❌❌
# check latest build status
last_build = history[0]
- if last_build['status'] == 'success':
- if connector.startswith('source'):
+ if last_build["status"] == "success":
+ if connector.startswith("source"):
SUCCESS_SOURCE.append(connector)
- elif connector.startswith('destination'):
+ elif connector.startswith("destination"):
SUCCESS_DESTINATION.append(connector)
else:
- failed_today = [connector, short_status, last_build['link']]
+ failed_today = [connector, short_status, last_build["link"]]
- if len(history) > 1 and history[1]['status'] != 'success':
+ if len(history) > 1 and history[1]["status"] != "success":
FAILED_2_LAST.append(failed_today)
return
@@ -91,32 +104,29 @@ def failed_report(failed_report) -> str:
max_name_len = max([len(connector[0]) for connector in failed_report])
max_status_len = max(len(connector[1]) for connector in failed_report)
for connector in failed_report:
- connector[0] = connector[0].ljust(max_name_len, ' ')
- connector[1] = connector[1].rjust(max_status_len, ' ')
- return '\n'.join([' '.join(connector) for connector in failed_report])
-
+ connector[0] = connector[0].ljust(max_name_len, " ")
+ connector[1] = connector[1].rjust(max_status_len, " ")
+ return "\n".join([" ".join(connector) for connector in failed_report])
-def create_report(connectors) -> str:
- sources_len = len([name for name in connectors if name.startswith('source')])
- destinations_len = len([name for name in connectors if name.startswith('destination')])
+def create_report(connectors, statuses: List[str]) -> str:
+ sources_len = len([name for name in connectors if name.startswith("source")])
+ destinations_len = len([name for name in connectors if name.startswith("destination")])
report = f"""
-CONNECTORS: total: {len(connectors)}
-Sources: total: {sources_len} / tested: {len(TESTED_SOURCE)} / success: {len(SUCCESS_SOURCE)} ({round(len(SUCCESS_SOURCE)/sources_len*100, 1)}%)
-Destinations: total: {destinations_len} / tested: {len(TESTED_DESTINATION)} / success: {len(SUCCESS_DESTINATION)} ({round(len(SUCCESS_DESTINATION)/destinations_len*100, 1)}%)
+CONNECTORS: total: {len(connectors)} {" & ".join(statuses)} connectors
+Sources: total: {sources_len} / tested: {len(TESTED_SOURCE)} / success: {len(SUCCESS_SOURCE)} ({round(len(SUCCESS_SOURCE) / sources_len * 100, 1)}%)
+Destinations: total: {destinations_len} / tested: {len(TESTED_DESTINATION)} / success: {len(SUCCESS_DESTINATION)} ({round(len(SUCCESS_DESTINATION) / destinations_len * 100, 1)}%)
"""
if FAILED_LAST:
- report += f"FAILED LAST BUILD ONLY - {len(FAILED_LAST)} connectors:\n" + \
- failed_report(FAILED_LAST) + '\n\n'
+ report += f"FAILED LAST BUILD ONLY - {len(FAILED_LAST)} connectors:\n" + failed_report(FAILED_LAST) + "\n\n"
if FAILED_2_LAST:
- report += f"FAILED TWO LAST BUILDS - {len(FAILED_2_LAST)} connectors:\n" + \
- failed_report(FAILED_2_LAST) + '\n\n'
+ report += f"FAILED TWO LAST BUILDS - {len(FAILED_2_LAST)} connectors:\n" + failed_report(FAILED_2_LAST) + "\n\n"
if NO_TESTS:
- report += f"NO TESTS - {len(NO_TESTS)} connectors:\n" + '\n'.join(NO_TESTS) + '\n'
+ report += f"NO TESTS - {len(NO_TESTS)} connectors:\n" + "\n".join(NO_TESTS) + "\n"
return report
@@ -124,41 +134,119 @@ def create_report(connectors) -> str:
def send_report(report):
webhook = WebhookClient(os.environ["SLACK_BUILD_REPORT"])
try:
+
def chunk_messages(report):
"""split report into messages with no more than 4000 chars each (slack limitation)"""
- msg = ''
+ msg = ""
for line in report.splitlines():
- msg += line + '\n'
+ msg += line + "\n"
if len(msg) > 3500:
yield msg
- msg = ''
+ msg = ""
yield msg
+
for msg in chunk_messages(report):
webhook.send(text=f"```{msg}```")
- print(f'Report has been sent')
+ print("Report has been sent")
except SlackApiError as e:
- print(f'Unable to send report')
+ print("Unable to send report")
assert e.response["error"]
-TESTED_SOURCE = []
-TESTED_DESTINATION = []
-SUCCESS_SOURCE = []
-SUCCESS_DESTINATION = []
-NO_TESTS = []
-FAILED_LAST = []
-FAILED_2_LAST = []
+def parse_dockerfile_repository_label(dockerfile_contents: str) -> Optional[str]:
+ parsed_label = re.findall(r"LABEL io.airbyte.name=(.*)[\s\n]*", dockerfile_contents)
+ if len(parsed_label) == 1:
+ return parsed_label[0]
+ elif len(parsed_label) == 0:
+ return None
+ else:
+ raise Exception(f"found more than one label in dockerfile: {dockerfile_contents}")
+
+
+def get_docker_label_to_connector_directory(base_directory: str, connector_module_names: List[str]) -> Dict[str, str]:
+ result = {}
+ for connector in connector_module_names:
+ # parse the dockerfile label if the dockerfile exists
+ dockerfile_path = pathlib.Path(base_directory, connector, "Dockerfile")
+ if os.path.isfile(dockerfile_path):
+ print(f"Reading f{dockerfile_path}")
+ with open(dockerfile_path, "r") as file:
+ dockerfile_contents = file.read()
+ label = parse_dockerfile_repository_label(dockerfile_contents)
+ if label:
+ result[label] = connector
+ else:
+ print(f"Couldn't find a connector label in {dockerfile_path}")
+ else:
+ print(f"Couldn't find a dockerfile at {dockerfile_path}")
+ return result
+
+
+def get_connectors_with_release_stage(definitions_yaml: List, stages: List[str]) -> List[str]:
+ """returns e.g: ['airbyte/source-salesforce', ...] when given 'generally_available' as input"""
+ return [definition["dockerRepository"] for definition in definitions_yaml if definition.get("releaseStage", "alpha") in stages]
+
+
+def read_definitions_yaml(path: str):
+ with open(path, "r") as file:
+ return yaml.safe_load(file)
+
+
+def get_connectors_with_release_stages(base_directory: str, connectors: List[str], relevant_stages=["beta", "generally_available"]):
+ # TODO currently this also excludes shared libs like source-jdbc, we probably shouldn't do that, so we can get the build status of those
+ # modules as well.
+ connector_label_to_connector_directory = get_docker_label_to_connector_directory(base_directory, connectors)
+
+ connectors_with_desired_status = get_connectors_with_release_stage(
+ read_definitions_yaml(SOURCE_DEFINITIONS_YAML), relevant_stages
+ ) + get_connectors_with_release_stage(read_definitions_yaml(DESTINATION_DEFINITIONS_YAML), relevant_stages)
+ # return appropriate directory names
+ return [
+ connector_label_to_connector_directory[label]
+ for label in connectors_with_desired_status
+ if label in connector_label_to_connector_directory
+ ]
+
+
+def setup_module():
+ global pytest
+ global mock
if __name__ == "__main__":
- # find all connectors
- connectors = sorted(os.listdir("./airbyte-integrations/connectors"))
- print(f"Checking connectors: {len(connectors)}")
+
+ # find all connectors and filter to beta and GA
+ connectors = sorted(os.listdir(CONNECTORS_ROOT_PATH))
+ relevant_stages = ["beta", "generally_available"]
+ relevant_connectors = get_connectors_with_release_stages(CONNECTORS_ROOT_PATH, connectors, relevant_stages)
+ print(f"Checking {len(relevant_connectors)} relevant connectors out of {len(connectors)} total connectors")
# analyse build results for each connector
- [check_connector(connector) for connector in connectors]
+ [check_connector(connector) for connector in relevant_connectors]
- report = create_report(connectors)
+ report = create_report(relevant_connectors, relevant_stages)
print(report)
- send_report(report)
- print('Finish')
+ # send_report(report)
+ print("Finish")
+elif "pytest" in sys.argv[0]:
+ import unittest
+
+ class Tests(unittest.TestCase):
+ def test_filter_definitions_yaml(self):
+ mock_def_yaml = [
+ {"releaseStage": "alpha", "dockerRepository": "alpha_connector"},
+ {"releaseStage": "beta", "dockerRepository": "beta_connector"},
+ {"releaseStage": "generally_available", "dockerRepository": "GA_connector"},
+ ]
+ assert ["alpha_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["alpha"])
+ assert ["alpha_connector", "beta_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["alpha", "beta"])
+ assert ["beta_connector", "GA_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["beta", "generally_available"])
+ assert ["GA_connector"] == get_connectors_with_release_stage(mock_def_yaml, ["generally_available"])
+
+ def test_parse_dockerfile_label(self):
+ mock_dockerfile = """
+ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
+
+LABEL io.airbyte.version=1.0.8
+LABEL io.airbyte.name=airbyte/source-salesforce"""
+ assert "airbyte/source-salesforce" == parse_dockerfile_repository_label(mock_dockerfile)