diff --git a/poetry.lock b/poetry.lock index 8d6ab69c..5a5b90fd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -181,6 +181,17 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.9.0,<2.10.0" pyflakes = ">=2.5.0,<2.6.0" +[[package]] +name = "freezegun" +version = "1.2.2" +description = "Let your Python tests travel through time" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +python-dateutil = ">=2.7" + [[package]] name = "fs" version = "2.4.16" @@ -616,6 +627,22 @@ redis = ["redis (>=3)"] security = ["itsdangerous (>=2.0)"] yaml = ["pyyaml (>=5.4)"] +[[package]] +name = "requests-mock" +version = "1.10.0" +description = "Mock out responses from the requests package" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testrepository (>=0.0.18)", "testtools"] + [[package]] name = "setuptools" version = "65.5.1" @@ -737,7 +764,7 @@ python-versions = ">=3.6" [[package]] name = "types-beautifulsoup4" -version = "4.11.6" +version = "4.11.6.1" description = "Typing stubs for beautifulsoup4" category = "dev" optional = false @@ -745,7 +772,7 @@ python-versions = "*" [[package]] name = "types-python-dateutil" -version = "2.8.19.2" +version = "2.8.19.3" description = "Typing stubs for python-dateutil" category = "main" optional = false @@ -753,7 +780,7 @@ python-versions = "*" [[package]] name = "types-requests" -version = "2.28.11.2" +version = "2.28.11.4" description = "Typing stubs for requests" category = "dev" optional = false @@ -764,7 +791,7 @@ types-urllib3 = "<1.27" [[package]] name = "types-simplejson" -version = "3.17.7.1" +version = "3.17.7.2" description = "Typing stubs for simplejson" category = "main" optional = false @@ -772,7 +799,7 @@ python-versions = "*" [[package]] name = "types-urllib3" -version = "1.26.25.1" +version = "1.26.25.3" description = "Typing stubs for urllib3" category = "dev" optional = false @@ -825,7 +852,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "1.1" python-versions = "<3.12,>=3.7.2" -content-hash = "4edb4de509d11e6e2c91dac2d3ab67f59b56d5ebc54be261af37e68fa80c4187" +content-hash = "fa5d8452093b2f91027c40a63e0a5aac7f77ca7c50fd29aed6f4b52726734f28" [metadata.files] appdirs = [ @@ -993,6 +1020,10 @@ flake8 = [ {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, ] +freezegun = [ + {file = "freezegun-1.2.2-py3-none-any.whl", hash = "sha256:ea1b963b993cb9ea195adbd893a48d573fda951b0da64f60883d7e988b606c9f"}, + {file = "freezegun-1.2.2.tar.gz", hash = "sha256:cd22d1ba06941384410cd967d8a99d5ae2442f57dfafeff2fda5de8dc5c05446"}, +] fs = [ {file = "fs-2.4.16-py2.py3-none-any.whl", hash = "sha256:660064febbccda264ae0b6bace80a8d1be9e089e0a5eb2427b7d517f9a91545c"}, {file = "fs-2.4.16.tar.gz", hash = "sha256:ae97c7d51213f4b70b6a958292530289090de3a7e15841e108fbe144f069d313"}, @@ -1294,6 +1325,10 @@ requests-cache = [ {file = "requests_cache-0.9.7-py3-none-any.whl", hash = "sha256:3f57badcd8406ecda7f8eaa8145afd0b180c5ae4ff05165a2c4d40f3dc88a6e5"}, {file = "requests_cache-0.9.7.tar.gz", hash = "sha256:b7c26ea98143bac7058fad6e773d56c3442eabc0da9ea7480af5edfc134ff515"}, ] +requests-mock = [ + {file = "requests-mock-1.10.0.tar.gz", hash = "sha256:59c9c32419a9fb1ae83ec242d98e889c45bd7d7a65d48375cc243ec08441658b"}, + {file = "requests_mock-1.10.0-py2.py3-none-any.whl", hash = "sha256:2fdbb637ad17ee15c06f33d31169e71bf9fe2bdb7bc9da26185be0dd8d842699"}, +] setuptools = [ {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"}, {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"}, @@ -1447,24 +1482,24 @@ typed-ast = [ {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, ] types-beautifulsoup4 = [ - {file = "types-beautifulsoup4-4.11.6.tar.gz", hash = "sha256:2670dd71995df464041e2941fa9bbb694795271e3dedd7262b4766649a1cbe82"}, - {file = "types_beautifulsoup4-4.11.6-py3-none-any.whl", hash = "sha256:ac9dd1383481201ea07f27c5a43e7b1ee71caf9c720b7ae951db15d60d126e80"}, + {file = "types-beautifulsoup4-4.11.6.1.tar.gz", hash = "sha256:d46be8f409ddccb6daaa9d118484185e70bcf552085c39c6d05b157cd1462e04"}, + {file = "types_beautifulsoup4-4.11.6.1-py3-none-any.whl", hash = "sha256:c1f803367a2b07ad4fdac40ddbea557010dc4ddd1ee92d801f317eb02e2e3c72"}, ] types-python-dateutil = [ - {file = "types-python-dateutil-2.8.19.2.tar.gz", hash = "sha256:e6e32ce18f37765b08c46622287bc8d8136dc0c562d9ad5b8fd158c59963d7a7"}, - {file = "types_python_dateutil-2.8.19.2-py3-none-any.whl", hash = "sha256:3f4dbe465e7e0c6581db11fd7a4855d1355b78712b3f292bd399cd332247e9c0"}, + {file = "types-python-dateutil-2.8.19.3.tar.gz", hash = "sha256:a313284df5ed3fd078303262edc0efde28998cd08e5061ef1ccc0bb5fef4d2da"}, + {file = "types_python_dateutil-2.8.19.3-py3-none-any.whl", hash = "sha256:ce6af1bdf0aca6b7dc8815a664f0e8b55da91ff7851102cf87c87178e7c8e7ec"}, ] types-requests = [ - {file = "types-requests-2.28.11.2.tar.gz", hash = "sha256:fdcd7bd148139fb8eef72cf4a41ac7273872cad9e6ada14b11ff5dfdeee60ed3"}, - {file = "types_requests-2.28.11.2-py3-none-any.whl", hash = "sha256:14941f8023a80b16441b3b46caffcbfce5265fd14555844d6029697824b5a2ef"}, + {file = "types-requests-2.28.11.4.tar.gz", hash = "sha256:d4f342b0df432262e9e326d17638eeae96a5881e78e7a6aae46d33870d73952e"}, + {file = "types_requests-2.28.11.4-py3-none-any.whl", hash = "sha256:bdb1f9811e53d0642c8347b09137363eb25e1a516819e190da187c29595a1df3"}, ] types-simplejson = [ - {file = "types-simplejson-3.17.7.1.tar.gz", hash = "sha256:cf490006a775972f15b9524f4838f8557d4609d68e4d4a214b33f0a87830cc19"}, - {file = "types_simplejson-3.17.7.1-py3-none-any.whl", hash = "sha256:5860a7d88e966f678408489bfb3c5fbf814350afc290292817e6af11c5ba200a"}, + {file = "types-simplejson-3.17.7.2.tar.gz", hash = "sha256:a4968032706c4460e7fbe6ba66c5744ae7c9b94afd7c0abead7d54ab3fa67127"}, + {file = "types_simplejson-3.17.7.2-py3-none-any.whl", hash = "sha256:ee1a49ed92b92a7adeaabb3413b71bad3623fb731791c5614db763ce9f195725"}, ] types-urllib3 = [ - {file = "types-urllib3-1.26.25.1.tar.gz", hash = "sha256:a948584944b2412c9a74b9cf64f6c48caf8652cb88b38361316f6d15d8a184cd"}, - {file = "types_urllib3-1.26.25.1-py3-none-any.whl", hash = "sha256:f6422596cc9ee5fdf68f9d547f541096a20c2dcfd587e37c804c9ea720bf5cb2"}, + {file = "types-urllib3-1.26.25.3.tar.gz", hash = "sha256:1807b87b8ee1ae0226813ba2c52330eff20fb2bf6359b1de24df08eb3090e442"}, + {file = "types_urllib3-1.26.25.3-py3-none-any.whl", hash = "sha256:a188c24fc61a99658c8c324c8dd7419f5b91a0d89df004e5f576869122c1db55"}, ] typing-extensions = [ {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, diff --git a/pyproject.toml b/pyproject.toml index ceb17571..6d29bc42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ types-requests = "^2.25.6" types-python-dateutil = "^2.8.9" requests-cache = "^0.9.1" isort = "^5.10.1" +freezegun = "^1.2.1" +requests-mock = "^1.9.3" [[tool.mypy.overrides]] module = [ diff --git a/tap_github/client.py b/tap_github/client.py index ec2ddba1..a7f6e72a 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -57,6 +57,32 @@ def http_headers(self) -> Dict[str, str]: headers["User-Agent"] = cast(str, self.config.get("user_agent", "tap-github")) return headers + def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: + """Return a generator of row-type dictionary objects. + + Each row emitted should be a dictionary of property names to their values. + + Args: + context: Stream partition or context dictionary. + + Yields: + One item per (possibly processed) record in the API. + """ + yield from super().get_records(context) + + # Important - Update state for streams in descending order + if self.use_fake_since_parameter: + state = self.get_context_state(context) + if set(["replication_key_signpost", "replication_key"]).issubset( + state.keys() + ): + record: Dict = {} + record[state["replication_key"]] = state["replication_key_signpost"] + self._increment_stream_state( + latest_record=record, + context=context, + ) + def get_next_page_token( self, response: requests.Response, previous_token: Optional[Any] ) -> Optional[Any]: diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index 711d9663..5a347eb2 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -1001,6 +1001,8 @@ class CommitsStream(GitHubRestStream): parent_stream_type = RepositoryStream state_partitioning_keys = ["repo", "org"] ignore_parent_replication_key = True + # Warning: /commits endpoint accept "since" but results are ordered by descending commit_timestamp + use_fake_since_parameter = True def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: """ diff --git a/tap_github/tests/fixtures.py b/tap_github/tests/fixtures.py index 169a6d47..872fa046 100644 --- a/tap_github/tests/fixtures.py +++ b/tap_github/tests/fixtures.py @@ -128,3 +128,94 @@ def alternative_sync_chidren(self, child_context: dict, no_sync: bool = True) -> # default behavior: if child_stream.selected or child_stream.has_selected_descendents: child_stream.sync(context=child_context) + + +@pytest.fixture +def mock_commits_response(): + """ + Return a fixed API response for the commits stream. + Used to test descending streams and bookmarks interaction. + This mock is trimmed down a fair bit from the actual response to save space. + """ + return [ + { + "sha": "1a984f582592fc5979d78bb0151707deefadde6f", + "node_id": "C_kwDOFcLMsNoAKDFhOTg0ZjU4MjU5MmZjNTk3OWQ3OGJiMDE1MTcwN2RlZWZhZGRlNmY", + "commit": { + "author": { + "date": "2022-07-01T13:47:56Z", + }, + "committer": { + "name": "GitHub", + "email": "noreply@github.com", + "date": "2022-07-01T13:47:56Z", + }, + "message": "Wait and retry on secondary limits (#153)", + "tree": { + "sha": "d3b0aa136fff77eaf84684dd25116f59fba2c2e6", + }, + "verification": { + "verified": True, + }, + }, + "author": {}, + "committer": {}, + "parents": [ + { + "sha": "14ed2b3401a73ea25bfb5206acba836a0e7a8683", + } + ], + }, + { + "sha": "14ed2b3401a73ea25bfb5206acba836a0e7a8683", + "node_id": "C_kwDOFcLMsNoAKDE0ZWQyYjM0MDFhNzNlYTI1YmZiNTIwNmFjYmE4MzZhMGU3YTg2ODM", + "commit": { + "author": {}, + "committer": { + "name": "GitHub", + "email": "noreply@github.com", + "date": "2022-06-24T20:18:19Z", + }, + "message": "Update sdk to fix bug in api costs hook (#151)", + }, + "author": {}, + "committer": { + "login": "web-flow", + "id": 19864447, + "node_id": "MDQ6VXNlcjE5ODY0NDQ3", + }, + "parents": [ + { + "sha": "16d6d7a91f520bb33e3590daf90fc9c699bc92a8", + } + ], + }, + { + "sha": "16d6d7a91f520bb33e3590daf90fc9c699bc92a8", + "node_id": "C_kwDOFcLMsNoAKDE2ZDZkN2E5MWY1MjBiYjMzZTM1OTBkYWY5MGZjOWM2OTliYzkyYTg", + "commit": { + "author": { + "date": "2022-06-24T19:28:46Z", + }, + "committer": { + "name": "GitHub", + "email": "noreply@github.com", + "date": "2022-06-24T19:28:46Z", + }, + "message": "Bypass since parameter in issue_comments to avoid server errors (#150)", + "tree": { + "sha": "5140a94f7436dbf230133c83410155b57b787739", + }, + "verification": { + "verified": True, + }, + }, + "author": {}, + "committer": {}, + "parents": [ + { + "sha": "438a2346cf91612dad553d7cdd6b9ebd27a7e1d4", + } + ], + }, + ] diff --git a/tap_github/tests/test_tap.py b/tap_github/tests/test_tap.py index 872c0f45..054ca3bb 100644 --- a/tap_github/tests/test_tap.py +++ b/tap_github/tests/test_tap.py @@ -1,15 +1,21 @@ -import logging -import os +import json from typing import Optional from unittest.mock import patch import pytest +import requests_mock +from freezegun import freeze_time from singer_sdk._singerlib import Catalog from singer_sdk.helpers import _catalog as cat_helpers from tap_github.tap import TapGitHub -from .fixtures import alternative_sync_chidren, repo_list_config, username_list_config +from .fixtures import ( + alternative_sync_chidren, + mock_commits_response, + repo_list_config, + username_list_config, +) repo_list_2 = [ "MeltanoLabs/tap-github", @@ -124,3 +130,55 @@ def test_get_a_user_in_user_usernames_mode( assert '{"username": "aaronsteers"' in captured_out assert '{"username": "aaRONsTeeRS"' not in captured_out assert '{"username": "EricBoucher"' not in captured_out + + +@pytest.mark.repo_list(["MeltanoLabs/tap-github"]) +def test_replication_key_for_desc_streams( + repo_list_config: dict, mock_commits_response +): + """Verify that the stream correctly saves bookmarks for streams + that are ordered in descending order. + """ + # instantiate a tap for the commits stream, with 1 single repo + tap1 = TapGitHub(config=repo_list_config) + tap1.run_discovery() + catalog = Catalog.from_dict(tap1.catalog_dict) + cat_helpers.deselect_all_streams(catalog) + cat_helpers.set_catalog_stream_selected( + catalog=catalog, + stream_name="commits", + selected=True, + ) + tap2 = TapGitHub(config=repo_list_config, catalog=catalog.to_dict()) + # set pagination to 3 records/page (to reduce fixture size) + for _, stream in tap2.streams.items(): + stream.MAX_PER_PAGE = 3 # type: ignore + + # mock all calls to the commits endpoint (this test should just use 1) + mocked_url = "https://api.github.com/repos/MeltanoLabs/tap-github/commits" + + # pretend that the date is 2022-07-01T14:00:00 (just after the latest expected + # commit in the repo (in the mock)) + with freeze_time("2022-07-01 14:00:00"): + # non-mocked calls are forwarded to the actual server + with requests_mock.Mocker(real_http=True) as m: + m.get(mocked_url, json=mock_commits_response) + # sync the stream, which will return the mocked response + # that contains 3 records + tap2.sync_all() + + # get the final state for the commits stream + s = None + for name, stream in tap2.streams.items(): + if name == "commits": + s = stream.stream_state + # the bookmark should be the timestamp of the latest commit + assert s == { + "partitions": [ + { + "context": {"org": "MeltanoLabs", "repo": "tap-github"}, + "replication_key": "commit_timestamp", + "replication_key_value": "2022-07-01T13:47:56Z", + } + ] + }