-
Notifications
You must be signed in to change notification settings - Fork 4.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
connectors-ci: make source-file testable in airbyte-ci #27107
Changes from all commits
78b8379
1984f1d
a84f280
5ea0d9a
3a80a31
78b15fe
d3f4d80
b6c9c47
9366142
20f75e1
afe9318
6a62ada
3dc35e1
8f2b428
f5999aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,22 @@ | ||
FROM airbyte/source-file:0.3.9 | ||
### WARNING ### | ||
# This Dockerfile will soon be deprecated. | ||
# It is not used to build the connector image we publish to DockerHub. | ||
# The new logic to build the connector image is declared with Dagger here: | ||
# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L771 | ||
|
||
# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. | ||
# Please reach out to the Connectors Operations team if you have any question. | ||
FROM airbyte/source-file:0.3.10 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice. |
||
|
||
WORKDIR /airbyte/integration_code | ||
COPY source_file_secure ./source_file_secure | ||
COPY main.py ./ | ||
COPY setup.py ./ | ||
ENV DOCKER_BUILD=True | ||
RUN pip install . | ||
|
||
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" | ||
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] | ||
|
||
LABEL io.airbyte.version=0.3.9 | ||
LABEL io.airbyte.version=0.3.10 | ||
LABEL io.airbyte.name=airbyte/source-file-secure |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
-e ../../bases/connector-acceptance-test | ||
-e ../source-file | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
-e . |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,9 +2,20 @@ | |
# Copyright (c) 2023 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import os | ||
from pathlib import Path | ||
|
||
from setuptools import find_packages, setup | ||
|
||
|
||
def local_dependency(name: str) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compute the absolute path to the local dependencies. In dagger pipelines we mount local dependencies to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh dear python! I like this solution. My only reservations are having
So I want to ask a question for #1 and propose a possible idea for 2
|
||
"""Returns a path to a local package.""" | ||
if os.environ.get("DAGGER_BUILD"): | ||
return f"{name} @ file:///local_dependencies/{name}" | ||
else: | ||
return f"{name} @ file://{Path.cwd().parent / name}" | ||
|
||
|
||
MAIN_REQUIREMENTS = [ | ||
"airbyte-cdk~=0.1", | ||
"gcsfs==2022.7.1", | ||
|
@@ -23,6 +34,9 @@ | |
"pyxlsb==1.0.9", | ||
] | ||
|
||
if not os.environ.get("DOCKER_BUILD"): | ||
MAIN_REQUIREMENTS.append(local_dependency("source-file")) | ||
|
||
TEST_REQUIREMENTS = ["boto3==1.21.21", "pytest==7.1.2", "pytest-docker==1.0.0", "pytest-mock~=3.8.2"] | ||
|
||
setup( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,34 +4,19 @@ | |
|
||
import json | ||
import os | ||
import sys | ||
|
||
import source_file | ||
|
||
# some integration tests doesn't setup dependences from | ||
# requirements.txt file and Python can return a exception. | ||
# Thus we should to import this parent module manually | ||
from airbyte_cdk import AirbyteLogger | ||
from airbyte_cdk.models import ConnectorSpecification | ||
|
||
try: | ||
import source_file.source | ||
except ModuleNotFoundError: | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
parent_source_local = os.path.join(current_dir, "../../source-file") | ||
if os.path.isdir(parent_source_local): | ||
sys.path.append(parent_source_local) | ||
else: | ||
raise RuntimeError("not found parent source folder") | ||
import source_file.source | ||
Comment on lines
-15
to
-24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not required anymore as the source file connector is installed as a normal python deps with pip install. |
||
|
||
# import original classes of the native Source File | ||
from source_file import SourceFile as ParentSourceFile | ||
from source_file.client import Client | ||
from source_file.client import URLFile as ParentURLFile | ||
|
||
LOCAL_STORAGE_NAME = "local" | ||
|
||
|
||
class URLFileSecure(ParentURLFile): | ||
class URLFileSecure(source_file.client.URLFile): | ||
"""Updating of default logic: | ||
This connector shouldn't work with local files. | ||
""" | ||
|
@@ -43,7 +28,7 @@ def __init__(self, url: str, provider: dict, binary=None, encoding=None): | |
super().__init__(url, provider, binary, encoding) | ||
|
||
|
||
class SourceFileSecure(ParentSourceFile): | ||
class SourceFileSecure(source_file.SourceFile): | ||
"""Updating of default source logic | ||
This connector shouldn't work with local files. | ||
The base logic of this connector are implemented in the "source-file" connector. | ||
|
@@ -52,7 +37,7 @@ class SourceFileSecure(ParentSourceFile): | |
@property | ||
def client_class(self): | ||
# replace a standard class variable to the new one | ||
class ClientSecure(Client): | ||
class ClientSecure(source_file.client.Client): | ||
reader_class = URLFileSecure | ||
|
||
return ClientSecure | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
version: '3' | ||
version: "3" | ||
services: | ||
ssh: | ||
image: atmoz/sftp | ||
ports: | ||
- "2222:22" | ||
volumes: | ||
- ./sample_files:/home/user1/files | ||
- /tmp/s3_sample_files:/home/user1/files | ||
command: user1:abc123@456#:1001 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ | |
"pyxlsb==1.0.9", | ||
] | ||
|
||
TEST_REQUIREMENTS = ["pytest~=6.2", "pytest-docker~=1.0.0", "pytest-mock~=3.6.1"] | ||
TEST_REQUIREMENTS = ["pytest~=6.2", "pytest-docker~=1.0.0", "pytest-mock~=3.6.1", "docker-compose"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a python package to work with docker? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The source-file integration test use docker-compose without specifying it as a test dependency. I added this so that our pipeline installs it within the test environment, that does not have |
||
|
||
setup( | ||
name="source_file", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,10 @@ | |
from __future__ import annotations | ||
|
||
import importlib.util | ||
import re | ||
import uuid | ||
from typing import TYPE_CHECKING, List, Optional | ||
from pathlib import Path | ||
from typing import TYPE_CHECKING, List, Optional, Tuple | ||
|
||
from ci_connector_ops.pipelines import consts | ||
from ci_connector_ops.pipelines.consts import ( | ||
|
@@ -48,7 +50,7 @@ def with_python_base(context: PipelineContext, python_image_name: str = "python: | |
.from_(python_image_name) | ||
.with_mounted_cache("/root/.cache/pip", pip_cache) | ||
.with_mounted_directory("/tools", context.get_repo_dir("tools", include=["ci_credentials", "ci_common_utils"])) | ||
.with_exec(["pip", "install", "--upgrade", "pip"]) | ||
.with_exec(["pip", "install", "pip==23.1.2"]) | ||
) | ||
|
||
return base_container | ||
|
@@ -99,6 +101,47 @@ def with_python_package( | |
return container | ||
|
||
|
||
async def find_local_python_dependencies(context: PipelineContext, package_source_code_path: str) -> Tuple[List[str], List[str]]: | ||
"""Retrieve the list of local dependencies of a python package source code. | ||
Returns both the list of local dependencies found in setup.py and requirements.txt. | ||
|
||
Args: | ||
context (PipelineContext): The current pipeline context. | ||
package_source_code_path (str): Path to the package source code in airbyte repo . | ||
|
||
Returns: | ||
Tuple[List[str], List[str]]: A tuple containing the list of local dependencies found in setup.py and requirements.txt. | ||
""" | ||
python_environment = with_python_base(context) | ||
container = with_python_package(context, python_environment, package_source_code_path) | ||
|
||
# Find local dependencies in setup.py | ||
setup_dependency_paths = [] | ||
if await get_file_contents(container, "setup.py"): | ||
container_with_egg_info = container.with_exec(["python", "setup.py", "egg_info"]) | ||
egg_info_output = await container_with_egg_info.stdout() | ||
for line in egg_info_output.split("\n"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This must ahve been painful, well done |
||
if line.startswith("writing requirements to"): | ||
requires_txt_path = line.replace("writing requirements to", "").strip() | ||
requires_txt = await container_with_egg_info.file(requires_txt_path).contents() | ||
for line in requires_txt.split("\n"): | ||
if "file://" in line: | ||
match = re.search(r"file:///(.+)", line) | ||
if match: | ||
setup_dependency_paths.append(match.group(1)) | ||
break | ||
|
||
# Find local dependencies in requirements.txt | ||
requirements_dependency_paths = [] | ||
if requirements_txt := await get_file_contents(container, "requirements.txt"): | ||
for line in requirements_txt.split("\n"): | ||
if line.startswith("-e ."): | ||
local_dependency_path = Path(package_source_code_path + "/" + line[3:]).resolve() | ||
local_dependency_path = str(local_dependency_path.relative_to(Path.cwd())) | ||
requirements_dependency_paths.append(local_dependency_path) | ||
return setup_dependency_paths, requirements_dependency_paths | ||
|
||
|
||
async def with_installed_python_package( | ||
context: PipelineContext, | ||
python_environment: Container, | ||
|
@@ -118,20 +161,19 @@ async def with_installed_python_package( | |
Returns: | ||
Container: A python environment container with the python package installed. | ||
""" | ||
install_local_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"] | ||
install_requirements_cmd = ["python", "-m", "pip", "install", "-r", "requirements.txt"] | ||
install_connector_package_cmd = ["python", "-m", "pip", "install", "."] | ||
|
||
container = with_python_package(context, python_environment, package_source_code_path, exclude=exclude) | ||
if requirements_txt := await get_file_contents(container, "requirements.txt"): | ||
for line in requirements_txt.split("\n"): | ||
if line.startswith("-e ."): | ||
local_dependency_path = package_source_code_path + "/" + line[3:] | ||
container = container.with_mounted_directory( | ||
"/" + local_dependency_path, context.get_repo_dir(local_dependency_path, exclude=DEFAULT_PYTHON_EXCLUDE) | ||
) | ||
container = container.with_exec(install_local_requirements_cmd) | ||
|
||
container = container.with_exec(install_connector_package_cmd) | ||
setup_dependencies, requirements_dependencies = await find_local_python_dependencies(context, package_source_code_path) | ||
for dependency_directory in setup_dependencies + requirements_dependencies: | ||
container = container.with_mounted_directory("/" + dependency_directory, context.get_repo_dir(dependency_directory)) | ||
|
||
if await get_file_contents(container, "setup.py"): | ||
container = container.with_exec(install_connector_package_cmd) | ||
if await get_file_contents(container, "requirements.txt"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: We likely will need on for pyproject.toml at some point. |
||
container = container.with_exec(install_requirements_cmd) | ||
|
||
if additional_dependency_groups: | ||
container = container.with_exec( | ||
|
@@ -677,6 +719,9 @@ async def get_cdk_version_from_python_connector(python_connector: Container) -> | |
|
||
|
||
async def with_airbyte_python_connector(context: ConnectorContext, build_platform: Platform) -> Container: | ||
if context.connector.technical_name == "source-file-secure": | ||
return await with_airbyte_python_connector_full_dagger(context, build_platform) | ||
|
||
pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache") | ||
connector_container = ( | ||
context.dagger_client.container(platform=build_platform) | ||
|
@@ -732,35 +777,43 @@ async def finalize_build(context: ConnectorContext, connector_container: Contain | |
return connector_container.with_entrypoint(original_entrypoint) | ||
|
||
|
||
# This function is not used at the moment as we decided to use Python connectors dockerfile instead of building it with dagger. | ||
# Some python connectors use alpine base image, other use debian... We should unify this. | ||
def with_airbyte_python_connector_full_dagger(context: ConnectorContext, build_platform: Platform) -> Container: | ||
async def with_airbyte_python_connector_full_dagger(context: ConnectorContext, build_platform: Platform) -> Container: | ||
setup_dependencies_to_mount, _ = await find_local_python_dependencies(context, str(context.connector.code_directory)) | ||
|
||
pip_cache: CacheVolume = context.dagger_client.cache_volume("pip_cache") | ||
base = context.dagger_client.container(platform=build_platform).from_("python:3.9.11-alpine3.15") | ||
base = context.dagger_client.container(platform=build_platform).from_("python:3.9-slim") | ||
snake_case_name = context.connector.technical_name.replace("-", "_") | ||
entrypoint = ["python", "/airbyte/integration_code/main.py"] | ||
builder = ( | ||
base.with_workdir("/airbyte/integration_code") | ||
.with_exec(["apk", "--no-cache", "upgrade"]) | ||
.with_env_variable("DAGGER_BUILD", "True") | ||
.with_exec(["apt-get", "update"]) | ||
.with_mounted_cache("/root/.cache/pip", pip_cache) | ||
.with_exec(["pip", "install", "--upgrade", "pip"]) | ||
.with_exec(["apk", "--no-cache", "add", "tzdata", "build-base"]) | ||
.with_exec(["apt-get", "install", "-y", "tzdata"]) | ||
.with_file("setup.py", context.get_connector_dir(include="setup.py").file("setup.py")) | ||
.with_exec(["pip", "install", "--prefix=/install", "."]) | ||
) | ||
return ( | ||
|
||
for dependency_path in setup_dependencies_to_mount: | ||
in_container_dependency_path = f"/local_dependencies/{Path(dependency_path).name}" | ||
builder = builder.with_mounted_directory(in_container_dependency_path, context.get_repo_dir(dependency_path)) | ||
|
||
builder = builder.with_exec(["pip", "install", "--prefix=/install", "."]) | ||
|
||
connector_container = ( | ||
base.with_workdir("/airbyte/integration_code") | ||
.with_directory("/usr/local", builder.directory("/install")) | ||
.with_file("/usr/localtime", builder.file("/usr/share/zoneinfo/Etc/UTC")) | ||
.with_new_file("/etc/timezone", "Etc/UTC") | ||
.with_exec(["apk", "--no-cache", "add", "bash"]) | ||
.with_exec(["apt-get", "install", "-y", "bash"]) | ||
.with_file("main.py", context.get_connector_dir(include="main.py").file("main.py")) | ||
.with_directory(snake_case_name, context.get_connector_dir(include=snake_case_name).directory(snake_case_name)) | ||
.with_env_variable("AIRBYTE_ENTRYPOINT", " ".join(entrypoint)) | ||
.with_entrypoint(entrypoint) | ||
.with_label("io.airbyte.version", context.metadata["dockerImageTag"]) | ||
.with_label("io.airbyte.name", context.metadata["dockerRepository"]) | ||
) | ||
return await finalize_build(context, connector_container) | ||
|
||
|
||
def with_crane( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
source-file-secure
is now fully built from Dagger. Assource-file
needs to be mounted at build time it was easier to accomplish it with Dagger.