Skip to content

Commit

Permalink
Source Github: Use default behaviour, retry on 429 and all 5XX errors (
Browse files Browse the repository at this point in the history
…#17852)

Signed-off-by: Sergey Chvalyuk <[email protected]>
  • Loading branch information
grubberr authored Oct 12, 2022
1 parent feab43a commit 8e6fb79
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.3.5
dockerImageTag: 0.3.6
documentationUrl: https://docs.airbyte.com/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3502,7 +3502,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.3.5"
- dockerImage: "airbyte/source-github:0.3.6"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.3.5
LABEL io.airbyte.version=0.3.6
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import time
from abc import ABC, abstractmethod
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
from urllib import parse

import pendulum
Expand Down Expand Up @@ -66,8 +66,9 @@ def check_graphql_rate_limited(self, response_json) -> bool:
return False

def should_retry(self, response: requests.Response) -> bool:
# We don't call `super()` here because we have custom error handling and GitHub API sometimes returns strange
# errors. So in `read_records()` we have custom error handling which don't require to call `super()` here.
if super().should_retry(response):
return True

retry_flag = (
# The GitHub GraphQL API has limitations
# https://docs.github.com/en/graphql/overview/resource-limitations
Expand All @@ -77,12 +78,7 @@ def should_retry(self, response: requests.Response) -> bool:
or response.headers.get("X-RateLimit-Remaining") == "0"
# Secondary rate limits
# https://docs.github.com/en/rest/overview/resources-in-the-rest-api#secondary-rate-limits
or response.headers.get("Retry-After")
or response.status_code
in (
requests.codes.SERVER_ERROR,
requests.codes.BAD_GATEWAY,
)
or "Retry-After" in response.headers
)
if retry_flag:
self.logger.info(
Expand All @@ -91,22 +87,20 @@ def should_retry(self, response: requests.Response) -> bool:

return retry_flag

def backoff_time(self, response: requests.Response) -> Union[int, float]:
def backoff_time(self, response: requests.Response) -> Optional[float]:
# This method is called if we run into the rate limit. GitHub limits requests to 5000 per hour and provides
# `X-RateLimit-Reset` header which contains time when this hour will be finished and limits will be reset so
# we again could have 5000 per another hour.

if response.status_code in (requests.codes.SERVER_ERROR, requests.codes.BAD_GATEWAY):
return None
min_backoff_time = 60.0

retry_after = int(response.headers.get("Retry-After", 0))
if retry_after:
return retry_after
retry_after = response.headers.get("Retry-After")
if retry_after is not None:
return max(float(retry_after), min_backoff_time)

reset_time = response.headers.get("X-RateLimit-Reset")
backoff_time = float(reset_time) - time.time() if reset_time else 60

return max(backoff_time, 60) # This is a guarantee that no negative value will be returned.
if reset_time:
return max(float(reset_time) - time.time(), min_backoff_time)

def get_error_display_message(self, exception: BaseException) -> Optional[str]:
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
PullRequestStats,
Releases,
Repositories,
RepositoryStats,
Reviews,
Stargazers,
Tags,
Expand Down Expand Up @@ -72,8 +73,12 @@ def test_internal_server_error_retry(time_mock):
[
(HTTPStatus.BAD_GATEWAY, {}, None),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, None),
(HTTPStatus.FORBIDDEN, {"Retry-After": 120}, 120),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": 1655804724}, 300.0),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, None),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, 60),
(HTTPStatus.FORBIDDEN, {"Retry-After": "30"}, 60),
(HTTPStatus.FORBIDDEN, {"Retry-After": "120"}, 120),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804454"}, 60.0),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804724"}, 300.0),
],
)
@patch("time.time", return_value=1655804424.0)
Expand All @@ -86,6 +91,28 @@ def test_backoff_time(time_mock, http_status, response_headers, expected_backoff
assert stream.backoff_time(response_mock) == expected_backoff_time


@pytest.mark.parametrize(
("http_status", "response_headers", "text"),
[
(HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'),
(HTTPStatus.OK, {"X-RateLimit-Remaining": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""),
(HTTPStatus.BAD_GATEWAY, {}, ""),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, ""),
],
)
def test_should_retry(http_status, response_headers, text):
stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30)
response_mock = MagicMock()
response_mock.status_code = http_status
response_mock.headers = response_headers
response_mock.text = text
response_mock.json = lambda: json.loads(text)
assert stream.should_retry(response_mock)


@responses.activate
@patch("time.sleep")
def test_retry_after(time_mock):
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa

| Version | Date | Pull Request | Subject |
| :------ | :--------- | :---------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| 0.3.6 | 2022-10-11 | [17852](https://github.com/airbytehq/airbyte/pull/17852) | Use default behaviour, retry on 429 and all 5XX errors |
| 0.3.5 | 2022-10-07 | [17715](https://github.com/airbytehq/airbyte/pull/17715) | Improve 502 handling for `comments` stream |
| 0.3.4 | 2022-10-04 | [17555](https://github.com/airbytehq/airbyte/pull/17555) | Skip repository if got HTTP 500 for WorkflowRuns stream |
| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream |
Expand Down

0 comments on commit 8e6fb79

Please sign in to comment.