From 22f03e31a8b57beedfd3a3306aa459752aca7e81 Mon Sep 17 00:00:00 2001 From: Noel Jacob Date: Thu, 10 Nov 2022 17:37:42 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20New=20Source:=20PyPI=20[low-code?= =?UTF-8?q?=20cdk]=20(#18632)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * PyPI (#11) * Init * Update acceptance-test-config.yml * Update * Update acceptance-test-config.yml * Add requested changes and docs * Update acceptance.py * Update acceptance-test-config.yml * Update setup.py * fix EOFL * Update README.md * Update README.md * changes to pass tests * Update source_definitions.yaml * Update source_specs.yaml Co-authored-by: Vincent Koc Co-authored-by: Vincent Koc --- .../resources/seed/source_definitions.yaml | 7 + .../src/main/resources/seed/source_specs.yaml | 28 ++ airbyte-integrations/builds.md | 1 + .../connectors/source-pypi/.dockerignore | 6 + .../connectors/source-pypi/Dockerfile | 38 ++ .../connectors/source-pypi/README.md | 79 ++++ .../connectors/source-pypi/__init__.py | 3 + .../source-pypi/acceptance-test-config.yml | 27 ++ .../source-pypi/acceptance-test-docker.sh | 16 + .../connectors/source-pypi/build.gradle | 9 + .../source-pypi/integration_tests/__init__.py | 3 + .../integration_tests/acceptance.py | 14 + .../integration_tests/configured_catalog.json | 37 ++ .../integration_tests/invalid_config.json | 4 + .../integration_tests/sample_config.json | 4 + .../connectors/source-pypi/main.py | 13 + .../connectors/source-pypi/requirements.txt | 2 + .../connectors/source-pypi/setup.py | 29 ++ .../source-pypi/source_pypi/__init__.py | 8 + .../source-pypi/source_pypi/pypi.yaml | 57 +++ .../source_pypi/schemas/project.json | 437 ++++++++++++++++++ .../source_pypi/schemas/release.json | 331 +++++++++++++ .../source_pypi/schemas/stats.json | 30 ++ .../source-pypi/source_pypi/source.py | 18 + .../source-pypi/source_pypi/spec.yaml | 25 + docs/integrations/sources/pypi.md | 31 ++ 26 files changed, 1257 insertions(+) create mode 100644 airbyte-integrations/connectors/source-pypi/.dockerignore create mode 100644 airbyte-integrations/connectors/source-pypi/Dockerfile create mode 100644 airbyte-integrations/connectors/source-pypi/README.md create mode 100644 airbyte-integrations/connectors/source-pypi/__init__.py create mode 100644 airbyte-integrations/connectors/source-pypi/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-pypi/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-pypi/build.gradle create mode 100644 airbyte-integrations/connectors/source-pypi/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-pypi/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-pypi/integration_tests/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-pypi/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-pypi/integration_tests/sample_config.json create mode 100644 airbyte-integrations/connectors/source-pypi/main.py create mode 100644 airbyte-integrations/connectors/source-pypi/requirements.txt create mode 100644 airbyte-integrations/connectors/source-pypi/setup.py create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/__init__.py create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/pypi.yaml create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/schemas/project.json create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/schemas/release.json create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/schemas/stats.json create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/source.py create mode 100644 airbyte-integrations/connectors/source-pypi/source_pypi/spec.yaml create mode 100644 docs/integrations/sources/pypi.md diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index a22535c856c6..61f6ea682373 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -1120,6 +1120,13 @@ documentationUrl: https://docs.airbyte.com/integrations/sources/public-apis sourceType: api releaseStage: alpha +- name: PyPI + sourceDefinitionId: 88ecd3a8-5f5b-11ed-9b6a-0242ac120002 + dockerRepository: airbyte/source-pypi + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.com/integrations/sources/pypi + sourceType: api + releaseStage: alpha - name: Qualaroo sourceDefinitionId: b08e4776-d1de-4e80-ab5c-1e51dad934a2 dockerRepository: airbyte/source-qualaroo diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index c154b7e01fb5..33a3a0234135 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -10736,6 +10736,34 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-pypi:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/pypi" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Pypi Spec" + type: "object" + required: + - "project_name" + additionalProperties: true + properties: + project_name: + type: "string" + title: "PyPI Package" + description: "Name of the project/package. Can only be in lowercase with\ + \ hyphen. This is the name used using pip command for installing the package." + examples: + - "sampleproject" + version: + title: "Package Version" + type: "string" + description: "Version of the project/package. Use it to find a particular\ + \ release instead of all releases." + examples: + - "1.2.0" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-qualaroo:0.1.2" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/qualaroo" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index fef7c5ee950f..6d9ed46fa9a7 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -98,6 +98,7 @@ | Posthog | [![source-posthog](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-posthog%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-posthog) | | PrestaShop | [![source-prestashop](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-prestashop%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-prestashop) | | Primetric | [![source-primetric](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-primetric%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-primetric) | +| PyPI | [![source-public-apis](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fpypi%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-pypi) | | Public APIs | [![source-public-apis](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-public-apis%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-public-apis) | | CockroachDb | [![source-cockroachdb](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-cockroachdb%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-cockroachdb) | | Confluence | [![source-confluence](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-confluence%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-confluence) | diff --git a/airbyte-integrations/connectors/source-pypi/.dockerignore b/airbyte-integrations/connectors/source-pypi/.dockerignore new file mode 100644 index 000000000000..c0cf1a30c41d --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_pypi +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-pypi/Dockerfile b/airbyte-integrations/connectors/source-pypi/Dockerfile new file mode 100644 index 000000000000..a66fa8d7efdb --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_pypi ./source_pypi + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-pypi diff --git a/airbyte-integrations/connectors/source-pypi/README.md b/airbyte-integrations/connectors/source-pypi/README.md new file mode 100644 index 000000000000..f3411d8bae3a --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/README.md @@ -0,0 +1,79 @@ +# Pypi Source + +This is the repository for the Pypi configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/pypi). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-pypi:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/pypi) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_pypi/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source pypi test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-pypi:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-pypi:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-pypi:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-pypi:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-pypi:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-pypi:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-pypi:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-pypi:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-pypi/__init__.py b/airbyte-integrations/connectors/source-pypi/__init__.py new file mode 100644 index 000000000000..1100c1c58cf5 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-pypi/acceptance-test-config.yml b/airbyte-integrations/connectors/source-pypi/acceptance-test-config.yml new file mode 100644 index 000000000000..5aa563a20c36 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/acceptance-test-config.yml @@ -0,0 +1,27 @@ +# See [Source Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-pypi:dev +acceptance_tests: + spec: + tests: + - spec_path: "source_pypi/spec.yaml" + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + incremental: + bypass_reason: "This connector does not implement incremental sync" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-pypi/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-pypi/acceptance-test-docker.sh new file mode 100644 index 000000000000..c51577d10690 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-pypi/build.gradle b/airbyte-integrations/connectors/source-pypi/build.gradle new file mode 100644 index 000000000000..08ad52762c4a --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_pypi' +} diff --git a/airbyte-integrations/connectors/source-pypi/integration_tests/__init__.py b/airbyte-integrations/connectors/source-pypi/integration_tests/__init__.py new file mode 100644 index 000000000000..1100c1c58cf5 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-pypi/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-pypi/integration_tests/acceptance.py new file mode 100644 index 000000000000..950b53b59d41 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-pypi/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-pypi/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..63accfc2b79b --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/integration_tests/configured_catalog.json @@ -0,0 +1,37 @@ +{ + "streams" : [ + { + "stream" : { + "name" : "project", + "json_schema" : {}, + "supported_sync_modes" : [ + "full_refresh" + ] + }, + "sync_mode" : "full_refresh", + "destination_sync_mode" : "overwrite" + }, + { + "stream" : { + "name" : "release", + "json_schema" : {}, + "supported_sync_modes" : [ + "full_refresh" + ] + }, + "sync_mode" : "full_refresh", + "destination_sync_mode" : "overwrite" + }, + { + "stream" : { + "name" : "stats", + "json_schema" : {}, + "supported_sync_modes" : [ + "full_refresh" + ] + }, + "sync_mode" : "full_refresh", + "destination_sync_mode" : "overwrite" + } + ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-pypi/integration_tests/invalid_config.json new file mode 100644 index 000000000000..69bd31195137 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "project_name": 22, + "version": false +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-pypi/integration_tests/sample_config.json new file mode 100644 index 000000000000..4971ad947c22 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "project_name": "sampleproject", + "version": "1.2.0" +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/main.py b/airbyte-integrations/connectors/source-pypi/main.py new file mode 100644 index 000000000000..17000165c3bf --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_pypi import SourcePypi + +if __name__ == "__main__": + source = SourcePypi() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-pypi/requirements.txt b/airbyte-integrations/connectors/source-pypi/requirements.txt new file mode 100644 index 000000000000..0411042aa091 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-pypi/setup.py b/airbyte-integrations/connectors/source-pypi/setup.py new file mode 100644 index 000000000000..ec0aa103d81f --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.2", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_pypi", + description="Source implementation for Pypi.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/__init__.py b/airbyte-integrations/connectors/source-pypi/source_pypi/__init__.py new file mode 100644 index 000000000000..0e2a2c8c5996 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from .source import SourcePypi + +__all__ = ["SourcePypi"] diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/pypi.yaml b/airbyte-integrations/connectors/source-pypi/source_pypi/pypi.yaml new file mode 100644 index 000000000000..b6f64ce312a0 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/pypi.yaml @@ -0,0 +1,57 @@ +version: "0.1.0" + +definitions: + schema_loader: + type: JsonSchema + file_path: "./source_pypi/schemas/{{ options['name'] }}.json" + selector: + extractor: + field_pointer: [] + requester: + url_base: "https://pypi.org" + http_method: "GET" + request_options_provider: + request_headers: + User-Agent: "Airbyte" + Accept: "application/json" + retriever: + record_selector: + $ref: "*ref(definitions.selector)" + paginator: + type: NoPagination + requester: + $ref: "*ref(definitions.requester)" + base_stream: + schema_loader: + $ref: "*ref(definitions.schema_loader)" + retriever: + $ref: "*ref(definitions.retriever)" + + project_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "project" + path: "/pypi/{{ config['project_name'] }}/json" + + release_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "release" + path: "/pypi/{{ config['project_name'] }}/{{ config['version'] }}/json" + + stats_stream: + $ref: "*ref(definitions.base_stream)" + $options: + name: "stats" + path: "/stats" + +streams: + - "*ref(definitions.project_stream)" + - "*ref(definitions.release_stream)" + - "*ref(definitions.stats_stream)" + +check: + stream_names: + - "project" + - "release" + - "stats" diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/project.json b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/project.json new file mode 100644 index 000000000000..156ecdaf37ae --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/project.json @@ -0,0 +1,437 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Warehouse JSON API version 1.0", + "type": "object", + "required": [ + "info", + "last_serial", + "releases", + "urls" + ], + "properties": { + "info": { + "type": "object", + "description": "Generic information about a specific version of a project", + "required": [ + "author", + "author_email", + "license", + "name", + "project_url", + "version", + "yanked", + "yanked_reason" + ], + "properties": { + "author": { + "description": "The name of the company or individual who created the project", + "type": "string" + }, + "author_email": { + "description": "The author's email address", + "type": "string" + }, + "bugtrack_url": { + "description": "URL to find issues and bugs for the project", + "type": [ + "string", + "null" + ] + }, + "classifiers": { + "description": "Trove Classifier. Corresponds to https://packaging.python.org/specifications/core-metadata/#classifier-multiple-use", + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "description": "Corresponds to https://packaging.python.org/specifications/core-metadata/#description", + "type": "string" + }, + "description_content_type": { + "description": "Corresponds to https://packaging.python.org/specifications/core-metadata/#description-content-type", + "type": [ + "string", + "null" + ] + }, + "docs_url": { + "description": "URL to the project's documentation", + "type": [ + "string", + "null" + ] + }, + "download_url": { + "description": "[DEPRECATED]", + "type": [ + "string", + "null" + ] + }, + "downloads": { + "description": "[DEPRECATED]", + "type": "object" + }, + "home_page": { + "description": "URL to project home page", + "type": "string" + }, + "keywords": { + "description": "Keywords to use for project searching", + "type": "string" + }, + "license": { + "description": "Project's open source license", + "type": [ + "string", + "null" + ] + }, + "maintainer": { + "description": "Project maintainer name", + "type": [ + "string", + "null" + ] + }, + "maintainer_email": { + "description": "Project maintainer email address", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "Project's raw (non-normailzed name)", + "type": "string" + }, + "package_url": { + "description": "URL to the project page", + "type": "string" + }, + "platform": { + "description": "[DEPRECATED]", + "type": ["null", "string"] + }, + "project_url": { + "description": "URL to the project page", + "type": "string" + }, + "project_urls": { + "description": "Additional URLs that are relevant to your project. Corresponds to https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use", + "patternProperties": { + ".*": { + "type": ["null", "string"] + } + } + }, + "release_url": { + "description": "URL of the release page of the version of the project", + "type": "string" + }, + "requires_dist": { + "description": "Calculated project dependencies. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-dist-multiple-use", + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, + "requires_python": { + "description": "Python runtime version required for project. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-python", + "type": [ + "string", + "null" + ] + }, + "summary": { + "description": "A one-line summary of what the distribution does.. Corresponds to https://packaging.python.org/specifications/core-metadata/#summary", + "type": [ + "string", + "null" + ] + }, + "version": { + "description": "A string containing the distributions version number in the format specified in PEP 440. Corresponds to https://packaging.python.org/specifications/core-metadata/#version", + "type": "string" + }, + "yanked": { + "description": "If the version has been yanked. As defined in PEP 592", + "type": "boolean" + }, + "yanked_reason": { + "description": "Reason for applying PEP 592 version yank", + "type": [ + "string", + "null" + ] + } + } + }, + "last_serial": { + "type": "integer", + "description": "Monotonically increasing integer sequence that changes every time the project is updated" + }, + "releases": { + "type": "object", + "patternProperties": { + ".*": { + "version_urls": { + "type": "array", + "description": "A list of release artifacts associated with a version", + "items": { + "release_file": { + "description": "A single downloadable and installable artifact", + "type": "object", + "required": [ + "digests", + "filename", + "packagetype", + "size", + "upload_time_iso_8601", + "url", + "yanked", + "yanked_reason" + ], + "properties": { + "comment_text": { + "description": "[DEPRECATED]", + "type": "string" + }, + "digests": { + "description": "The file checksums", + "type": "object", + "properties": { + "md5": { + "description": "The MD5 checksum of the release file", + "type": "string" + }, + "sha256": { + "description": "The SHA256 checksum of the release file", + "type": "string" + } + } + }, + "downloads": { + "description": "[DEPRECATED]", + "type": "integer" + }, + "filename": { + "description": "Full filename (including extension)", + "type": "string" + }, + "has_sig": { + "description": "Indicates whether a $(filename).asc GPG signature file was provided", + "type": "boolean" + }, + "md5_digest": { + "description": "[DEPRECATED]", + "type": "string" + }, + "packagetype": { + "description": "Release file type: 'sdist', 'bdist_wheel', etc", + "type": "string" + }, + "python_version": { + "description": "Can be 'source' or Python Tag as defined in https://www.python.org/dev/peps/pep-0425/#python-tag", + "type": "string" + }, + "requires_python": { + "description": "Python runtime version required for project. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-python", + "type": [ + "string", + "null" + ] + }, + "size": { + "description": "File size in bytes", + "type": "integer" + }, + "upload_time": { + "description": "strftime('%Y-%m-%dT%H:%M:%S') of when the release file was uploaded", + "type": "string" + }, + "upload_time_iso_8601": { + "description": "ISO 8601 timestamp of when the release file was uploaded", + "type": "string" + }, + "url": { + "description": "Downloadable URL of the release file", + "type": "string" + }, + "yanked": { + "description": "Is release file PEP 592 yanked", + "type": "boolean" + }, + "yanked_reason": { + "description": "PEP 592 reason for yanking release file", + "type": [ + "string", + "null" + ] + } + } + } + } + } + } + } + }, + "urls": { + "type": "array", + "description": "A list of release artifacts associated with a version", + "items": { + "release_file": { + "description": "A single downloadable and installable artifact", + "type": "object", + "required": [ + "digests", + "filename", + "packagetype", + "size", + "upload_time_iso_8601", + "url", + "yanked", + "yanked_reason" + ], + "properties": { + "comment_text": { + "description": "[DEPRECATED]", + "type": "string" + }, + "digests": { + "description": "The file checksums", + "type": "object", + "properties": { + "md5": { + "description": "The MD5 checksum of the release file", + "type": "string" + }, + "sha256": { + "description": "The SHA256 checksum of the release file", + "type": "string" + } + } + }, + "downloads": { + "description": "[DEPRECATED]", + "type": "integer" + }, + "filename": { + "description": "Full filename (including extension)", + "type": "string" + }, + "has_sig": { + "description": "Indicates whether a $(filename).asc GPG signature file was provided", + "type": "boolean" + }, + "md5_digest": { + "description": "[DEPRECATED]", + "type": "string" + }, + "packagetype": { + "description": "Release file type: 'sdist', 'bdist_wheel', etc", + "type": "string" + }, + "python_version": { + "description": "Can be 'source' or Python Tag as defined in https://www.python.org/dev/peps/pep-0425/#python-tag", + "type": "string" + }, + "requires_python": { + "description": "Python runtime version required for project. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-python", + "type": [ + "string", + "null" + ] + }, + "size": { + "description": "File size in bytes", + "type": "integer" + }, + "upload_time": { + "description": "strftime('%Y-%m-%dT%H:%M:%S') of when the release file was uploaded", + "type": "string" + }, + "upload_time_iso_8601": { + "description": "ISO 8601 timestamp of when the release file was uploaded", + "type": "string" + }, + "url": { + "description": "Downloadable URL of the release file", + "type": "string" + }, + "yanked": { + "description": "Is release file PEP 592 yanked", + "type": "boolean" + }, + "yanked_reason": { + "description": "PEP 592 reason for yanking release file", + "type": [ + "string", + "null" + ] + } + } + } + } + }, + "vulnerabilities": { + "type": "array", + "description": "A mapping of version identifiers to vulnerability information", + "items": { + "type": "object", + "properties": { + "aliases": { + "type": [ + "array", + "null" + ], + "description": "A list of CVE vulns", + "items": { + "type": "string" + } + }, + "details": { + "type": [ + "string", + "null" + ], + "description": "A description of the vulnerability" + }, + "fixed_in": { + "type": [ + "array", + "null" + ], + "description": "A list of versions that are fixed", + "items": { + "type": "string" + } + }, + "id": { + "type": [ + "string", + "null" + ] + }, + "link": { + "type": [ + "string", + "null" + ] + }, + "source": { + "type": [ + "string", + "null" + ] + } + } + } + } + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/release.json b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/release.json new file mode 100644 index 000000000000..1ed0c48013b0 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/release.json @@ -0,0 +1,331 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Warehouse JSON API version 1.0", + "type": "object", + "required": [ + "info", + "last_serial", + "urls" + ], + "properties": { + "info": { + "type": "object", + "description": "Generic information about a specific version of a project", + "required": [ + "author", + "author_email", + "license", + "name", + "project_url", + "version", + "yanked", + "yanked_reason" + ], + "properties": { + "author": { + "description": "The name of the company or individual who created the project", + "type": "string" + }, + "author_email": { + "description": "The author's email address", + "type": "string" + }, + "bugtrack_url": { + "description": "URL to find issues and bugs for the project", + "type": [ + "string", + "null" + ] + }, + "classifiers": { + "description": "Trove Classifier. Corresponds to https://packaging.python.org/specifications/core-metadata/#classifier-multiple-use", + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "description": "Corresponds to https://packaging.python.org/specifications/core-metadata/#description", + "type": "string" + }, + "description_content_type": { + "description": "Corresponds to https://packaging.python.org/specifications/core-metadata/#description-content-type", + "type": [ + "string", + "null" + ] + }, + "docs_url": { + "description": "URL to the project's documentation", + "type": [ + "string", + "null" + ] + }, + "download_url": { + "description": "[DEPRECATED]", + "type": [ + "string", + "null" + ] + }, + "downloads": { + "description": "[DEPRECATED]", + "type": "object" + }, + "home_page": { + "description": "URL to project home page", + "type": "string" + }, + "keywords": { + "description": "Keywords to use for project searching", + "type": "string" + }, + "license": { + "description": "Project's open source license", + "type": [ + "string", + "null" + ] + }, + "maintainer": { + "description": "Project maintainer name", + "type": [ + "string", + "null" + ] + }, + "maintainer_email": { + "description": "Project maintainer email address", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "Project's raw (non-normailzed name)", + "type": "string" + }, + "package_url": { + "description": "URL to the project page", + "type": "string" + }, + "platform": { + "description": "[DEPRECATED]", + "type": ["null", "string"] + }, + "project_url": { + "description": "URL to the project page", + "type": "string" + }, + "project_urls": { + "description": "Additional URLs that are relevant to your project. Corresponds to https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use", + "patternProperties": { + ".*": { + "type": ["null", "string"] + } + } + }, + "release_url": { + "description": "URL of the release page of the version of the project", + "type": "string" + }, + "requires_dist": { + "description": "Calculated project dependencies. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-dist-multiple-use", + "type": [ + "array", + "null" + ], + "items": { + "type": "string" + } + }, + "requires_python": { + "description": "Python runtime version required for project. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-python", + "type": [ + "string", + "null" + ] + }, + "summary": { + "description": "A one-line summary of what the distribution does.. Corresponds to https://packaging.python.org/specifications/core-metadata/#summary", + "type": [ + "string", + "null" + ] + }, + "version": { + "description": "A string containing the distributions version number in the format specified in PEP 440. Corresponds to https://packaging.python.org/specifications/core-metadata/#version", + "type": "string" + }, + "yanked": { + "description": "If the version has been yanked. As defined in PEP 592", + "type": "boolean" + }, + "yanked_reason": { + "description": "Reason for applying PEP 592 version yank", + "type": [ + "string", + "null" + ] + } + } + }, + "last_serial": { + "type": "integer", + "description": "Monotonically increasing integer sequence that changes every time the project is updated" + }, + "urls": { + "type": "array", + "description": "A list of release artifacts associated with a version", + "items": { + "release_file": { + "description": "A single downloadable and installable artifact", + "type": "object", + "required": [ + "digests", + "filename", + "packagetype", + "size", + "upload_time_iso_8601", + "url", + "yanked", + "yanked_reason" + ], + "properties": { + "comment_text": { + "description": "[DEPRECATED]", + "type": "string" + }, + "digests": { + "description": "The file checksums", + "type": "object", + "properties": { + "md5": { + "description": "The MD5 checksum of the release file", + "type": "string" + }, + "sha256": { + "description": "The SHA256 checksum of the release file", + "type": "string" + } + } + }, + "downloads": { + "description": "[DEPRECATED]", + "type": "integer" + }, + "filename": { + "description": "Full filename (including extension)", + "type": "string" + }, + "has_sig": { + "description": "Indicates whether a $(filename).asc GPG signature file was provided", + "type": "boolean" + }, + "md5_digest": { + "description": "[DEPRECATED]", + "type": "string" + }, + "packagetype": { + "description": "Release file type: 'sdist', 'bdist_wheel', etc", + "type": "string" + }, + "python_version": { + "description": "Can be 'source' or Python Tag as defined in https://www.python.org/dev/peps/pep-0425/#python-tag", + "type": "string" + }, + "requires_python": { + "description": "Python runtime version required for project. Corresponds to https://packaging.python.org/specifications/core-metadata/#requires-python", + "type": [ + "string", + "null" + ] + }, + "size": { + "description": "File size in bytes", + "type": "integer" + }, + "upload_time": { + "description": "strftime('%Y-%m-%dT%H:%M:%S') of when the release file was uploaded", + "type": "string" + }, + "upload_time_iso_8601": { + "description": "ISO 8601 timestamp of when the release file was uploaded", + "type": "string" + }, + "url": { + "description": "Downloadable URL of the release file", + "type": "string" + }, + "yanked": { + "description": "Is release file PEP 592 yanked", + "type": "boolean" + }, + "yanked_reason": { + "description": "PEP 592 reason for yanking release file", + "type": [ + "string", + "null" + ] + } + } + } + } + }, + "vulnerabilities": { + "type": "array", + "description": "A mapping of version identifiers to vulnerability information", + "items": { + "type": "object", + "properties": { + "aliases": { + "type": [ + "array", + "null" + ], + "description": "A list of CVE vulns", + "items": { + "type": "string" + } + }, + "details": { + "type": [ + "string", + "null" + ], + "description": "A description of the vulnerability" + }, + "fixed_in": { + "type": [ + "array", + "null" + ], + "description": "A list of versions that are fixed", + "items": { + "type": "string" + } + }, + "id": { + "type": [ + "string", + "null" + ] + }, + "link": { + "type": [ + "string", + "null" + ] + }, + "source": { + "type": [ + "string", + "null" + ] + } + } + } + } + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/stats.json b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/stats.json new file mode 100644 index 000000000000..b764c04d10f6 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/schemas/stats.json @@ -0,0 +1,30 @@ +{ + "$schema" : "http://json-schema.org/draft-07/schema#", + "title" : "PyPI status", + "type" : "object", + "required" : [ + "top_packages", + "total_packages_size" + ], + "properties" : { + "top_packages" : { + "type" : "object", + "patternProperties" : { + ".*" : { + "type" : "object", + "required" : [ + "size" + ], + "properties" : { + "size" : { + "type" : ["null", "integer"] + } + } + } + } + }, + "total_packages_size" : { + "type" : "integer" + } + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/source.py b/airbyte-integrations/connectors/source-pypi/source_pypi/source.py new file mode 100644 index 000000000000..754a3600be54 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourcePypi(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "pypi.yaml"}) diff --git a/airbyte-integrations/connectors/source-pypi/source_pypi/spec.yaml b/airbyte-integrations/connectors/source-pypi/source_pypi/spec.yaml new file mode 100644 index 000000000000..0559dec96114 --- /dev/null +++ b/airbyte-integrations/connectors/source-pypi/source_pypi/spec.yaml @@ -0,0 +1,25 @@ +documentationUrl: https://docs.airbyte.io/integrations/sources/pypi +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Pypi Spec + type: object + required: + - project_name + additionalProperties: true + properties: + project_name: + type: string + title: PyPI Package + description: >- + Name of the project/package. Can only be in lowercase with hyphen. + This is the name used using pip command for installing the package. + examples: + - sampleproject + version: + title: Package Version + type: string + description: >- + Version of the project/package. + Use it to find a particular release instead of all releases. + examples: + - 1.2.0 \ No newline at end of file diff --git a/docs/integrations/sources/pypi.md b/docs/integrations/sources/pypi.md new file mode 100644 index 000000000000..10f096fc0a6e --- /dev/null +++ b/docs/integrations/sources/pypi.md @@ -0,0 +1,31 @@ +# PyPI + +This page guides you through the process of setting up the PyPI source connector. + +## Setup guide +### Get package name from PyPI +This is the name given in `pip install package_name` box. For example, `airbyte-cdk` is the package name for [airbyte-cdk](https://pypi.org/project/airbyte-cdk/). + +Optianlly, provide a version name. If not provided, the release stream, containing data for particular version, cannot be used. The project stream is as same as release stream but contains data for all versions. + +## Supported streams and sync modes + +* [Project](https://warehouse.pypa.io/api-reference/json.html#project) +* [Release](https://warehouse.pypa.io/api-reference/json.html#release) +* [Stats](https://warehouse.pypa.io/api-reference/stats.html) + +### Performance considerations + +Due to the heavy caching and CDN use, there is currently no rate limiting of PyPI APIs at the edge. + +In addition, PyPI reserves the right to temporarily or permanently prohibit a consumer based on irresponsible activity. + +Try not to make a lot of requests (thousands) in a short amount of time (minutes). Generally PyPI can handle it, but it’s preferred to make requests in serial over a longer amount of time if possible. + +## Changelog + +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:----------------| +| 0.1.0 | 2022-10-29 | [18632](https://github.com/airbytehq/airbyte/pull/18632) | Initial Release | + +