Skip to content

Commit

Permalink
Source iterable: fix pendulum.parse memory leak (#19913)
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Chvalyuk <[email protected]>
  • Loading branch information
grubberr authored Dec 2, 2022
1 parent ef13c3e commit 3c6f979
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@
- name: Iterable
sourceDefinitionId: 2e875208-0c0b-4ee4-9e92-1cb3156ea799
dockerRepository: airbyte/source-iterable
dockerImageTag: 0.1.21
dockerImageTag: 0.1.22
documentationUrl: https://docs.airbyte.com/integrations/sources/iterable
icon: iterable.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6499,7 +6499,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-iterable:0.1.21"
- dockerImage: "airbyte/source-iterable:0.1.22"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/iterable"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-iterable/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.1.21
LABEL io.airbyte.version=0.1.22
LABEL io.airbyte.name=airbyte/source-iterable
1 change: 1 addition & 0 deletions airbyte-integrations/connectors/source-iterable/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MAIN_REQUIREMENTS = [
"airbyte-cdk",
"pendulum~=2.1.2",
"python-dateutil~=2.8.2",
"requests~=2.25",
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from requests import codes
from requests.exceptions import ChunkedEncodingError
from source_iterable.slice_generators import AdjustableSliceGenerator, RangeSliceGenerator, StreamSlice
from source_iterable.utils import dateutil_parse

EVENT_ROWS_LIMIT = 200
CAMPAIGNS_PER_REQUEST = 20
Expand Down Expand Up @@ -137,7 +138,7 @@ def _field_to_datetime(value: Union[int, str]) -> pendulum.datetime:
if isinstance(value, int):
value = pendulum.from_timestamp(value / 1000.0)
elif isinstance(value, str):
value = pendulum.parse(value, strict=False)
value = dateutil_parse(value)
else:
raise ValueError(f"Unsupported type of datetime field {type(value)}")
return value
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import dateutil.parser
import pendulum


def dateutil_parse(text):
"""
The custom function `dateutil_parse` replace `pendulum.parse(text, strict=False)` to avoid memory leak.
More details https://github.com/airbytehq/airbyte/pull/19913
"""
dt = dateutil.parser.parse(text)
return pendulum.datetime(
dt.year,
dt.month,
dt.day,
dt.hour,
dt.minute,
dt.second,
dt.microsecond,
tz=dt.tzinfo or pendulum.tz.UTC,
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Templates,
Users,
)
from source_iterable.utils import dateutil_parse


@pytest.mark.parametrize(
Expand Down Expand Up @@ -80,15 +81,15 @@ def test_templates_parse_response():
rsps.add(
responses.GET,
"https://api.iterable.com/api/1/foobar",
json={"templates": [{"createdAt": "2022", "id": 1}]},
json={"templates": [{"createdAt": "2022-01-01", "id": 1}]},
status=200,
content_type="application/json",
)
resp = requests.get("https://api.iterable.com/api/1/foobar")

records = stream.parse_response(response=resp)

assert list(records) == [{"id": 1, "createdAt": pendulum.parse("2022", strict=False)}]
assert list(records) == [{"id": 1, "createdAt": dateutil_parse("2022-01-01")}]


def test_list_users_parse_response():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import pendulum
from source_iterable.utils import dateutil_parse


def test_dateutil_parse():
assert pendulum.parse("2021-04-08 14:23:30 +00:00", strict=False) == dateutil_parse("2021-04-08 14:23:30 +00:00")
assert pendulum.parse("2021-04-14T16:51:23+00:00", strict=False) == dateutil_parse("2021-04-14T16:51:23+00:00")
assert pendulum.parse("2021-04-14T16:23:30.700000+00:00", strict=False) == dateutil_parse("2021-04-14T16:23:30.700000+00:00")
1 change: 1 addition & 0 deletions docs/integrations/sources/iterable.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ The Iterable source connector supports the following [sync modes](https://docs.a

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------|
| 0.1.22 | 2022-11-30 | [19913](https://github.com/airbytehq/airbyte/pull/19913) | Replace pendulum.parse -> dateutil.parser.parse to avoid memory leak |
| 0.1.21 | 2022-10-27 | [18537](https://github.com/airbytehq/airbyte/pull/18537) | Improve streams discovery |
| 0.1.20 | 2022-10-21 | [18292](https://github.com/airbytehq/airbyte/pull/18292) | Better processing of 401 and 429 errors |
| 0.1.19 | 2022-10-05 | [17602](https://github.com/airbytehq/airbyte/pull/17602) | Add check for stream permissions |
Expand Down

0 comments on commit 3c6f979

Please sign in to comment.