From eaa1eb6733bc0c32483f220839b8aecf5493c1e7 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 28 Sep 2022 09:57:18 +0530 Subject: [PATCH 01/39] added pre-commit --- tap_zendesk_chat/.pre-commit-config.yaml | 57 ++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tap_zendesk_chat/.pre-commit-config.yaml diff --git a/tap_zendesk_chat/.pre-commit-config.yaml b/tap_zendesk_chat/.pre-commit-config.yaml new file mode 100644 index 0000000..644fe89 --- /dev/null +++ b/tap_zendesk_chat/.pre-commit-config.yaml @@ -0,0 +1,57 @@ +default_stages: [commit] +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-merge-conflict + - id: check-docstring-first + - id: debug-statements + - id: trailing-whitespace + - id: check-toml + - id: end-of-file-fixer + - id: check-yaml + - id: sort-simple-yaml + - id: check-json + - id: pretty-format-json + args: ['--autofix','--no-sort-keys'] + + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + + - repo: https://github.com/psf/black + rev: 22.8.0 + hooks: + - id: black + + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + additional_dependencies: [ + 'flake8-print', + 'flake8-debugger', + ] + + - repo: https://github.com/PyCQA/bandit + rev: '1.7.4' + hooks: + - id: bandit + + - repo: https://github.com/asottile/pyupgrade + rev: v2.37.3 + hooks: + - id: pyupgrade + args: [--py37-plus] + + - repo: https://github.com/PyCQA/docformatter + rev: v1.5.0 + hooks: + - id: docformatter + args: [--in-place] + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.1 + hooks: + - id: codespell \ No newline at end of file From 30bfd30b77edc44e16cef3b2dcd8ca67e854b31b Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 29 Sep 2022 00:59:13 +0530 Subject: [PATCH 02/39] added pre-commit and updated gitignore --- .gitignore | 31 ++++++------------- ...mit-config.yaml => .pre-commit-config.yaml | 0 2 files changed, 10 insertions(+), 21 deletions(-) rename tap_zendesk_chat/.pre-commit-config.yaml => .pre-commit-config.yaml (100%) diff --git a/.gitignore b/.gitignore index 5386653..9e13d77 100644 --- a/.gitignore +++ b/.gitignore @@ -45,20 +45,7 @@ coverage.xml *,cover .hypothesis/ -# Translations -*.mo -*.pot -# Django stuff: -*.log -local_settings.py - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy # Sphinx documentation docs/_build/ @@ -72,8 +59,6 @@ target/ # pyenv .python-version -# celery beat schedule file -celerybeat-schedule # dotenv .env @@ -92,11 +77,15 @@ ENV/ ._* .DS_Store -# Custom stuff -env.sh +state.json +catalog.json config.json -.autoenv.zsh -rsa-key -tags -properties.json + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace \ No newline at end of file diff --git a/tap_zendesk_chat/.pre-commit-config.yaml b/.pre-commit-config.yaml similarity index 100% rename from tap_zendesk_chat/.pre-commit-config.yaml rename to .pre-commit-config.yaml From 44f6da2634d14544ca1f1a55bbd174daa24279b4 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 3 Oct 2022 11:08:57 +0530 Subject: [PATCH 03/39] updated file structure --- tap_zendesk_chat/__init__.py | 112 ++--------------------------------- tap_zendesk_chat/discover.py | 74 +++++++++++++++++++++++ tap_zendesk_chat/http.py | 5 +- tap_zendesk_chat/sync.py | 33 +++++++++++ tap_zendesk_chat/utils.py | 21 +++++++ 5 files changed, 136 insertions(+), 109 deletions(-) create mode 100644 tap_zendesk_chat/discover.py create mode 100644 tap_zendesk_chat/sync.py create mode 100644 tap_zendesk_chat/utils.py diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index 2169657..fe12b84 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,119 +1,17 @@ #!/usr/bin/env python3 import os import singer -from singer import metrics, utils, metadata -from singer.catalog import Catalog, CatalogEntry, Schema -from requests.exceptions import HTTPError -from . import streams as streams_ +from singer.utils import parse_args +from singer.catalog import Catalog from .context import Context -from .http import Client +from .discover import discover +from .sync import sync REQUIRED_CONFIG_KEYS = ["start_date", "access_token"] LOGGER = singer.get_logger() - -def get_abs_path(path): - return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) - - -def load_schema(tap_stream_id): - path = "schemas/{}.json".format(tap_stream_id) - schema = utils.load_json(get_abs_path(path)) - dependencies = schema.pop("tap_schema_dependencies", []) - refs = {} - for sub_stream_id in dependencies: - refs[sub_stream_id] = load_schema(sub_stream_id) - if refs: - singer.resolve_schema_references(schema, refs) - return schema - - -def ensure_credentials_are_authorized(client): - # The request will throw an exception if the credentials are not authorized - client.request(streams_.DEPARTMENTS.tap_stream_id) - - -def is_account_endpoint_authorized(client): - # The account endpoint is restricted to zopim accounts, meaning integrated - # Zendesk accounts will get a 403 for this endpoint. - try: - client.request(streams_.ACCOUNT.tap_stream_id) - except HTTPError as e: - if e.response.status_code == 403: - LOGGER.info( - "Ignoring 403 from account endpoint - this must be an " - "integrated Zendesk account. This endpoint will be excluded " - "from discovery." - ) - return False - else: - raise - return True - - -def discover(config): - client = Client(config) - ensure_credentials_are_authorized(client) - include_account_stream = is_account_endpoint_authorized(client) - catalog = Catalog([]) - for stream in streams_.all_streams: - if (not include_account_stream - and stream.tap_stream_id == streams_.ACCOUNT.tap_stream_id): - continue - raw_schema = load_schema(stream.tap_stream_id) - mdata = build_metadata(raw_schema, stream) - schema = Schema.from_dict(raw_schema) - catalog.streams.append(CatalogEntry( - stream=stream.tap_stream_id, - tap_stream_id=stream.tap_stream_id, - key_properties=stream.pk_fields, - schema=schema, - metadata=metadata.to_list(mdata) - )) - return catalog - -def build_metadata(raw_schema, stream): - - mdata = metadata.new() - metadata.write(mdata, (), 'valid-replication-keys', list(stream.replication_key)) - metadata.write(mdata, (), 'table-key-properties', list(stream.pk_fields)) - for prop in raw_schema['properties'].keys(): - if prop in stream.replication_key or prop in stream.pk_fields: - metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') - else: - metadata.write(mdata, ('properties', prop), 'inclusion', 'available') - - return mdata - - -def output_schema(stream): - schema = load_schema(stream.tap_stream_id) - singer.write_schema(stream.tap_stream_id, schema, stream.pk_fields) - - -def is_selected(stream): - mdata = metadata.to_map(stream.metadata) - return metadata.get(mdata, (), 'selected') - -def sync(ctx): - currently_syncing = ctx.state.get("currently_syncing") - start_idx = streams_.all_stream_ids.index(currently_syncing) \ - if currently_syncing else 0 - stream_ids_to_sync = [cs.tap_stream_id for cs in ctx.catalog.streams - if is_selected(cs)] - streams = [s for s in streams_.all_streams[start_idx:] - if s.tap_stream_id in stream_ids_to_sync] - for stream in streams: - ctx.state["currently_syncing"] = stream.tap_stream_id - output_schema(stream) - ctx.write_state() - stream.sync(ctx) - ctx.state["currently_syncing"] = None - ctx.write_state() - - def main_impl(): - args = utils.parse_args(REQUIRED_CONFIG_KEYS) + args = parse_args(REQUIRED_CONFIG_KEYS) if args.discover: discover(args.config).dump() print() diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py new file mode 100644 index 0000000..c3aa25a --- /dev/null +++ b/tap_zendesk_chat/discover.py @@ -0,0 +1,74 @@ +import singer +from singer import metadata +from singer.catalog import Catalog, CatalogEntry, Schema +from requests.exceptions import HTTPError +from . import streams as streams_ +from .http import Client +from .utils import load_schema + +LOGGER = singer.get_logger() + + + + +def ensure_credentials_are_authorized(client): + # The request will throw an exception if the credentials are not authorized + client.request(streams_.DEPARTMENTS.tap_stream_id) + + +def is_account_endpoint_authorized(client): + # The account endpoint is restricted to zopim accounts, meaning integrated + # Zendesk accounts will get a 403 for this endpoint. + try: + client.request(streams_.ACCOUNT.tap_stream_id) + except HTTPError as e: + if e.response.status_code == 403: + LOGGER.info( + "Ignoring 403 from account endpoint - this must be an " + "integrated Zendesk account. This endpoint will be excluded " + "from discovery." + ) + return False + else: + raise + return True + + +def discover(config): + client = Client(config) + ensure_credentials_are_authorized(client) + include_account_stream = is_account_endpoint_authorized(client) + catalog = Catalog([]) + for stream in streams_.all_streams: + if (not include_account_stream + and stream.tap_stream_id == streams_.ACCOUNT.tap_stream_id): + continue + raw_schema = load_schema(stream.tap_stream_id) + mdata = build_metadata(raw_schema, stream) + schema = Schema.from_dict(raw_schema) + catalog.streams.append(CatalogEntry( + stream=stream.tap_stream_id, + tap_stream_id=stream.tap_stream_id, + key_properties=stream.pk_fields, + schema=schema, + metadata=metadata.to_list(mdata) + )) + return catalog + +def build_metadata(raw_schema, stream): + + mdata = metadata.new() + metadata.write(mdata, (), 'valid-replication-keys', list(stream.replication_key)) + metadata.write(mdata, (), 'table-key-properties', list(stream.pk_fields)) + for prop in raw_schema['properties'].keys(): + if prop in stream.replication_key or prop in stream.pk_fields: + metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') + else: + metadata.write(mdata, ('properties', prop), 'inclusion', 'available') + + return mdata + + + + + diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index 12ef80a..ae53178 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -1,7 +1,7 @@ import requests -from singer import metrics +from singer import metrics, get_logger import backoff - +LOGGER = get_logger() BASE_URL = "https://www.zopim.com" @@ -28,6 +28,7 @@ def request(self, tap_stream_id, params=None, url=None, url_extra=""): headers = {"Authorization": "Bearer " + self.access_token} if self.user_agent: headers["User-Agent"] = self.user_agent + LOGGER.info("calling %s %s",url,params) request = requests.Request("GET", url, headers=headers, params=params) response = self.session.send(request.prepare()) timer.tags[metrics.Tag.http_status_code] = response.status_code diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py new file mode 100644 index 0000000..f8558ff --- /dev/null +++ b/tap_zendesk_chat/sync.py @@ -0,0 +1,33 @@ +from .utils import load_schema +import singer +from singer import metadata +from . import streams as streams_ + + + + +def output_schema(stream): + schema = load_schema(stream.tap_stream_id) + singer.write_schema(stream.tap_stream_id, schema, stream.pk_fields) + + +def is_selected(stream): + mdata = metadata.to_map(stream.metadata) + return metadata.get(mdata, (), 'selected') + +def sync(ctx): + currently_syncing = ctx.state.get("currently_syncing") + start_idx = streams_.all_stream_ids.index(currently_syncing) \ + if currently_syncing else 0 + stream_ids_to_sync = [cs.tap_stream_id for cs in ctx.catalog.streams + if is_selected(cs)] + streams = [s for s in streams_.all_streams[start_idx:] + if s.tap_stream_id in stream_ids_to_sync] + for stream in streams: + ctx.state["currently_syncing"] = stream.tap_stream_id + output_schema(stream) + ctx.write_state() + stream.sync(ctx) + ctx.state["currently_syncing"] = None + ctx.write_state() + diff --git a/tap_zendesk_chat/utils.py b/tap_zendesk_chat/utils.py new file mode 100644 index 0000000..e3e6d82 --- /dev/null +++ b/tap_zendesk_chat/utils.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import os +import singer +from singer.utils import load_json + +def get_abs_path(path): + return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) + + +def load_schema(tap_stream_id): + path = "schemas/{}.json".format(tap_stream_id) + schema = load_json(get_abs_path(path)) + dependencies = schema.pop("tap_schema_dependencies", []) + refs = {} + for sub_stream_id in dependencies: + refs[sub_stream_id] = load_schema(sub_stream_id) + if refs: + singer.resolve_schema_references(schema, refs) + return schema + + From 0c62c0827b4fae454e7879b132a0e8ecb26a9470 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 3 Oct 2022 11:14:28 +0530 Subject: [PATCH 04/39] updated init for tap --- tap_zendesk_chat/__init__.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index fe12b84..ad48b87 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import os import singer -from singer.utils import parse_args +from singer.utils import parse_args,handle_top_exception from singer.catalog import Catalog from .context import Context from .discover import discover @@ -10,23 +10,17 @@ REQUIRED_CONFIG_KEYS = ["start_date", "access_token"] LOGGER = singer.get_logger() -def main_impl(): + +@handle_top_exception(LOGGER) +def main(): + """performs sync and discovery.""" args = parse_args(REQUIRED_CONFIG_KEYS) if args.discover: discover(args.config).dump() - print() else: - catalog = Catalog.from_dict(args.properties) \ - if args.properties else discover(args.config) - ctx = Context(args.config, args.state, catalog) + ctx = Context(args.config, args.state, args.catalog or discover(args.config)) sync(ctx) -def main(): - try: - main_impl() - except Exception as exc: - LOGGER.critical(exc) - raise exc if __name__ == "__main__": - main() + main() \ No newline at end of file From 8ecb4337f84cdda362cd1853092e5a5ccf4c0610 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 6 Oct 2022 16:33:02 +0530 Subject: [PATCH 05/39] fixed sync.py --- tap_zendesk_chat/__init__.py | 2 +- tap_zendesk_chat/discover.py | 1 - tap_zendesk_chat/streams.py | 1 + tap_zendesk_chat/sync.py | 56 ++++++++++++++++-------------------- 4 files changed, 26 insertions(+), 34 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index ad48b87..bd97ef4 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -19,7 +19,7 @@ def main(): discover(args.config).dump() else: ctx = Context(args.config, args.state, args.catalog or discover(args.config)) - sync(ctx) + sync(ctx,args.catalog or discover(args.config),args.state) if __name__ == "__main__": diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index c3aa25a..04426d1 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -65,7 +65,6 @@ def build_metadata(raw_schema, stream): metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') else: metadata.write(mdata, ('properties', prop), 'inclusion', 'available') - return mdata diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 1c19241..02431fa 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -204,3 +204,4 @@ def sync(self, ctx): ACCOUNT, ] all_stream_ids = [s.tap_stream_id for s in all_streams] +STREAMS = {s.tap_stream_id:s for s in all_streams} \ No newline at end of file diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index f8558ff..85a6944 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -1,33 +1,25 @@ -from .utils import load_schema import singer -from singer import metadata -from . import streams as streams_ - - - - -def output_schema(stream): - schema = load_schema(stream.tap_stream_id) - singer.write_schema(stream.tap_stream_id, schema, stream.pk_fields) - - -def is_selected(stream): - mdata = metadata.to_map(stream.metadata) - return metadata.get(mdata, (), 'selected') - -def sync(ctx): - currently_syncing = ctx.state.get("currently_syncing") - start_idx = streams_.all_stream_ids.index(currently_syncing) \ - if currently_syncing else 0 - stream_ids_to_sync = [cs.tap_stream_id for cs in ctx.catalog.streams - if is_selected(cs)] - streams = [s for s in streams_.all_streams[start_idx:] - if s.tap_stream_id in stream_ids_to_sync] - for stream in streams: - ctx.state["currently_syncing"] = stream.tap_stream_id - output_schema(stream) - ctx.write_state() - stream.sync(ctx) - ctx.state["currently_syncing"] = None - ctx.write_state() - +from . import streams +LOGGER = singer.get_logger() + + +def sync(ctx, catalog: singer.Catalog, state): + """performs sync for selected streams.""" + with singer.Transformer() as transformer: + for stream in catalog.get_selected_streams(state): + tap_stream_id = stream.tap_stream_id + stream_schema = stream.schema.to_dict() + stream_metadata = singer.metadata.to_map(stream.metadata) + stream_obj = streams.STREAMS[tap_stream_id] + LOGGER.info("Starting sync for stream: %s", tap_stream_id) + ctx.state = singer.set_currently_syncing(ctx.state, tap_stream_id) + singer.write_state(state) + singer.write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) + stream_obj.sync(ctx) + state = stream_obj.sync( + state=state, schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer + ) + singer.write_state(ctx.state) + + ctx.state = singer.set_currently_syncing(ctx.state, None) + singer.write_state(ctx.state) \ No newline at end of file From bae0e69a604e66384a0d6714e76ea43e3d554eea Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 6 Oct 2022 23:49:43 +0530 Subject: [PATCH 06/39] fixed discovery --- tap_zendesk_chat/discover.py | 80 +++++++++++++++++------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 04426d1..85b07d8 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -3,6 +3,8 @@ from singer.catalog import Catalog, CatalogEntry, Schema from requests.exceptions import HTTPError from . import streams as streams_ +from .streams import STREAMS + from .http import Client from .utils import load_schema @@ -11,51 +13,24 @@ -def ensure_credentials_are_authorized(client): - # The request will throw an exception if the credentials are not authorized - client.request(streams_.DEPARTMENTS.tap_stream_id) - - -def is_account_endpoint_authorized(client): +def account_not_authorized(client): # The account endpoint is restricted to zopim accounts, meaning integrated # Zendesk accounts will get a 403 for this endpoint. try: - client.request(streams_.ACCOUNT.tap_stream_id) - except HTTPError as e: - if e.response.status_code == 403: + client.request(STREAMS["account"].tap_stream_id) + except HTTPError as err: + if err.response.status_code == 403: LOGGER.info( - "Ignoring 403 from account endpoint - this must be an " - "integrated Zendesk account. This endpoint will be excluded " - "from discovery." + "Ignoring 403 from account endpoint - this must be an \ + integrated Zendesk account. This endpoint will be excluded \ + from discovery" ) - return False - else: - raise - return True + return True + raise + return False -def discover(config): - client = Client(config) - ensure_credentials_are_authorized(client) - include_account_stream = is_account_endpoint_authorized(client) - catalog = Catalog([]) - for stream in streams_.all_streams: - if (not include_account_stream - and stream.tap_stream_id == streams_.ACCOUNT.tap_stream_id): - continue - raw_schema = load_schema(stream.tap_stream_id) - mdata = build_metadata(raw_schema, stream) - schema = Schema.from_dict(raw_schema) - catalog.streams.append(CatalogEntry( - stream=stream.tap_stream_id, - tap_stream_id=stream.tap_stream_id, - key_properties=stream.pk_fields, - schema=schema, - metadata=metadata.to_list(mdata) - )) - return catalog - -def build_metadata(raw_schema, stream): +def build_metadata(raw_schema :dict, stream): mdata = metadata.new() metadata.write(mdata, (), 'valid-replication-keys', list(stream.replication_key)) @@ -65,9 +40,30 @@ def build_metadata(raw_schema, stream): metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') else: metadata.write(mdata, ('properties', prop), 'inclusion', 'available') - return mdata - - - + return metadata.to_list(mdata) + + +def discover(config :dict) -> Catalog: + """ + discover function for tap-zendesk-chat + """ + if config: + client = Client(config) + # perform auth + client.request(STREAMS["departments"].tap_stream_id) + if account_not_authorized(client): + STREAMS.pop("account") + streams = [] + for stream_name, stream in STREAMS.items(): + schema = load_schema(stream.tap_stream_id) + streams.append( + { + "stream": stream_name, + "tap_stream_id": stream.tap_stream_id, + "schema": schema, + "metadata": build_metadata(schema,stream), + } + ) + return Catalog.from_dict({"streams": streams}) From f2e979925a66f3e415f9f2e3468240b5c7c69fe8 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 10 Oct 2022 17:45:41 +0530 Subject: [PATCH 07/39] fixed streams --- tap_zendesk_chat/streams.py | 80 +++++++++++++++---------------------- tap_zendesk_chat/sync.py | 33 +++++++-------- 2 files changed, 50 insertions(+), 63 deletions(-) diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 02431fa..f88d0f5 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -2,6 +2,7 @@ from pendulum import parse as dt_parse import singer from singer import metrics, Transformer, metadata +from typing import Dict LOGGER = singer.get_logger() @@ -43,36 +44,28 @@ def write_page(self, page): class Everything(Stream): - def sync(self, ctx): - with Transformer() as transformer: - schema = ctx.catalog.get_stream(self.tap_stream_id).schema.to_dict() - m_data = metadata.to_map(ctx.catalog.get_stream(self.tap_stream_id).metadata) - response = ctx.client.request(self.tap_stream_id) - page = [transformer.transform(rec, schema, metadata=m_data) for rec in response] - self.write_page(page) + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): + response = ctx.client.request(self.tap_stream_id) + page = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in response] + self.write_page(page) class Agents(Stream): - def sync(self, ctx): + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): since_id_offset = [self.tap_stream_id, "offset", "id"] since_id = ctx.bookmark(since_id_offset) or 0 - schema = ctx.catalog.get_stream(self.tap_stream_id).schema.to_dict() - m_data = metadata.to_map(ctx.catalog.get_stream(self.tap_stream_id).metadata) - - with Transformer() as transformer: - while True: - params = { - "since_id": since_id, - "limit": ctx.config.get("agents_page_limit", 100), - } - page = ctx.client.request(self.tap_stream_id, params) - if not page: - break - page = [transformer.transform(rec, schema, metadata=m_data) for rec in page] - self.write_page(page) - since_id = page[-1]["id"] + 1 - ctx.set_bookmark(since_id_offset, since_id) - ctx.write_state() + while True: + params = { + "since_id": since_id, + "limit": ctx.config.get("agents_page_limit", 100), + } + page = ctx.client.request(self.tap_stream_id, params) + if not page: + break + self.write_page([transformer.transform(rec, schema, metadata=stream_metadata) for rec in page]) + since_id = page[-1]["id"] + 1 + ctx.set_bookmark(since_id_offset, since_id) + ctx.write_state() ctx.set_bookmark(since_id_offset, None) ctx.write_state() @@ -95,7 +88,7 @@ def _search(self, ctx, chat_type, ts_field, return ctx.client.request( self.tap_stream_id, params=params, url_extra="/search") - def _pull(self, ctx, chat_type, ts_field, *, full_sync): + def _pull(self, ctx, chat_type, ts_field, *, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer): """Pulls and writes pages of data for the given chat_type, where chat_type can be either "chat" or "offline_msg". @@ -114,8 +107,7 @@ def _pull(self, ctx, chat_type, ts_field, *, full_sync): start_time = ctx.update_start_date_bookmark(ts_bookmark_key) next_url = ctx.bookmark(url_offset_key) max_bookmark = start_time - schema = ctx.catalog.get_stream(self.tap_stream_id).schema.to_dict() - m_data = metadata.to_map(ctx.catalog.get_stream(self.tap_stream_id).metadata) + interval_days = 14 interval_days_str = ctx.config.get("chat_search_interval_days") if interval_days_str is not None: @@ -136,7 +128,7 @@ def _pull(self, ctx, chat_type, ts_field, *, full_sync): chat_ids = [r["id"] for r in search_resp["results"]] chats = self._bulk_chats(ctx, chat_ids) if chats: - chats = [transformer.transform(rec, schema, metadata=m_data) for rec in chats] + chats = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in chats] self.write_page(chats) max_bookmark = max(max_bookmark, *[c[ts_field] for c in chats]) if not next_url: @@ -159,36 +151,30 @@ def _should_run_full_sync(self, ctx): return True return False - def sync(self, ctx): + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): full_sync = self._should_run_full_sync(ctx) - self._pull(ctx, "chat", "end_timestamp", full_sync=full_sync) - self._pull(ctx, "offline_msg", "timestamp", full_sync=full_sync) + self._pull(ctx, "chat", "end_timestamp", full_sync=full_sync,schema=schema,stream_metadata=stream_metadata,transformer=transformer) + self._pull(ctx, "offline_msg", "timestamp", full_sync=full_sync,schema=schema,stream_metadata=stream_metadata,transformer=transformer) if full_sync: ctx.state["chats_last_full_sync"] = ctx.now.isoformat() ctx.write_state() class Bans(Stream): - def sync(self, ctx): - with Transformer() as transformer: - schema = ctx.catalog.get_stream(self.tap_stream_id).schema.to_dict() - m_data = metadata.to_map(ctx.catalog.get_stream(self.tap_stream_id).metadata) - response = ctx.client.request(self.tap_stream_id) - page = response["visitor"] + response["ip_address"] - page = [transformer.transform(rec, schema, metadata=m_data) for rec in page] - self.write_page(page) + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): + response = ctx.client.request(self.tap_stream_id) + page = response["visitor"] + response["ip_address"] + page = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in page] + self.write_page(page) class Account(Stream): - def sync(self, ctx): + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): # The account endpoint returns a single item, so we have to wrap it in # a list to write a "page" - with Transformer() as transformer: - schema = ctx.catalog.get_stream(self.tap_stream_id).schema.to_dict() - m_data = metadata.to_map(ctx.catalog.get_stream(self.tap_stream_id).metadata) - response = ctx.client.request(self.tap_stream_id) - page = transformer.transform(response, schema, metadata=m_data) - self.write_page([page]) + response = ctx.client.request(self.tap_stream_id) + page = transformer.transform(response, schema, metadata=stream_metadata) + self.write_page([page]) DEPARTMENTS = Everything("departments", ["id"]) diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index 85a6944..241defe 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -1,25 +1,26 @@ -import singer +from singer import Transformer,metadata,Catalog,write_state,write_schema,set_currently_syncing,get_logger from . import streams -LOGGER = singer.get_logger() +from .http import Client +LOGGER = get_logger() - -def sync(ctx, catalog: singer.Catalog, state): +def sync(ctx,catalog: Catalog, state): """performs sync for selected streams.""" - with singer.Transformer() as transformer: + with Transformer() as transformer: for stream in catalog.get_selected_streams(state): tap_stream_id = stream.tap_stream_id stream_schema = stream.schema.to_dict() - stream_metadata = singer.metadata.to_map(stream.metadata) + stream_metadata = metadata.to_map(stream.metadata) stream_obj = streams.STREAMS[tap_stream_id] LOGGER.info("Starting sync for stream: %s", tap_stream_id) - ctx.state = singer.set_currently_syncing(ctx.state, tap_stream_id) - singer.write_state(state) - singer.write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) - stream_obj.sync(ctx) - state = stream_obj.sync( - state=state, schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer - ) - singer.write_state(ctx.state) + state = set_currently_syncing(state, tap_stream_id) + write_state(state) + write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) + try: + state = stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) + except Exception as err: + LOGGER.info("%s",err) + stream_obj.sync(ctx) + write_state(ctx.state) - ctx.state = singer.set_currently_syncing(ctx.state, None) - singer.write_state(ctx.state) \ No newline at end of file + ctx.state = set_currently_syncing(ctx.state, None) + write_state(ctx.state) \ No newline at end of file From b8019df60f00cad352844f03aedc30edf6e65699 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 11 Oct 2022 16:54:55 +0530 Subject: [PATCH 08/39] removed pendulum dependancy --- tap_zendesk_chat/__init__.py | 2 +- tap_zendesk_chat/context.py | 3 ++- tap_zendesk_chat/http.py | 5 +---- tap_zendesk_chat/streams.py | 8 ++++---- tap_zendesk_chat/sync.py | 12 ++++++------ 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index bd97ef4..088b0cb 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -19,7 +19,7 @@ def main(): discover(args.config).dump() else: ctx = Context(args.config, args.state, args.catalog or discover(args.config)) - sync(ctx,args.catalog or discover(args.config),args.state) + sync(ctx,args.catalog or discover(args.config)) if __name__ == "__main__": diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index a665750..68ed973 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -1,4 +1,5 @@ from datetime import datetime +from singer.utils import now import singer from .http import Client @@ -8,7 +9,7 @@ def __init__(self, config, state, catalog): self.state = state self.catalog = catalog self.client = Client(config) - self.now = datetime.utcnow() + self.now = now() @property def bookmarks(self): diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index ae53178..ce13586 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -16,10 +16,7 @@ def __init__(self, config): self.user_agent = config.get("user_agent") self.session = requests.Session() - @backoff.on_exception(backoff.expo, - RateLimitException, - max_tries=10, - factor=2) + @backoff.on_exception(backoff.expo,RateLimitException,max_tries=10,factor=2) def request(self, tap_stream_id, params=None, url=None, url_extra=""): if not params: params={} diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index f88d0f5..80ba2cb 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from pendulum import parse as dt_parse import singer -from singer import metrics, Transformer, metadata +from singer.utils import strptime_to_utc +from singer import metrics, Transformer from typing import Dict LOGGER = singer.get_logger() @@ -9,7 +9,7 @@ def break_into_intervals(days, start_time: str, now: datetime): delta = timedelta(days=days) - start_dt = dt_parse(start_time) + start_dt = strptime_to_utc(start_time) while start_dt < now: end_dt = min(start_dt + delta, now) yield start_dt, end_dt @@ -143,7 +143,7 @@ def _should_run_full_sync(self, ctx): if not last_sync: LOGGER.info("Running full sync of chats: no last sync time") return True - next_sync = dt_parse(last_sync) + timedelta(days=int(sync_days)) + next_sync = strptime_to_utc(last_sync) + timedelta(days=int(sync_days)) if next_sync <= ctx.now: LOGGER.info("Running full sync of chats: " "last sync was %s, configured to run every %s days", diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index 241defe..3ce105f 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -3,24 +3,24 @@ from .http import Client LOGGER = get_logger() -def sync(ctx,catalog: Catalog, state): +def sync(ctx,catalog: Catalog): """performs sync for selected streams.""" with Transformer() as transformer: - for stream in catalog.get_selected_streams(state): + for stream in catalog.get_selected_streams(ctx.state): tap_stream_id = stream.tap_stream_id stream_schema = stream.schema.to_dict() stream_metadata = metadata.to_map(stream.metadata) stream_obj = streams.STREAMS[tap_stream_id] LOGGER.info("Starting sync for stream: %s", tap_stream_id) - state = set_currently_syncing(state, tap_stream_id) - write_state(state) + ctx.state = set_currently_syncing(ctx.state, tap_stream_id) + ctx.write_state() write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) try: - state = stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) + stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) except Exception as err: LOGGER.info("%s",err) stream_obj.sync(ctx) - write_state(ctx.state) + ctx.write_state() ctx.state = set_currently_syncing(ctx.state, None) write_state(ctx.state) \ No newline at end of file From 7529d9ad0b12bf622439db1b843793832b81f10c Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 12 Oct 2022 23:30:51 +0530 Subject: [PATCH 09/39] added pagination support to bans stream --- tap_zendesk_chat/streams.py | 23 +++++++++++++++++++---- tap_zendesk_chat/sync.py | 7 +------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 80ba2cb..9c94888 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -162,11 +162,26 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme class Bans(Stream): def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): - response = ctx.client.request(self.tap_stream_id) - page = response["visitor"] + response["ip_address"] - page = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in page] - self.write_page(page) + since_id_offset = [self.tap_stream_id, "offset", "id"] + since_id = ctx.bookmark(since_id_offset) or 0 + while True: + params = { + "since_id": since_id, + "limit": ctx.config.get("agents_page_limit", 100), + # TODO: Add Additional advanced property in connection_properties + } + response = ctx.client.request(self.tap_stream_id, params) + page = response.get("visitor",[]) + response.get("ip_address",[]) + if not page: + break + page = response["visitor"] + response["ip_address"] + self.write_page([transformer.transform(rec, schema, metadata=stream_metadata) for rec in page]) + since_id = page[-1]["id"] + 1 + ctx.set_bookmark(since_id_offset, since_id) + ctx.write_state() + ctx.set_bookmark(since_id_offset, None) + ctx.write_state() class Account(Stream): def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index 3ce105f..90bf25a 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -1,6 +1,5 @@ from singer import Transformer,metadata,Catalog,write_state,write_schema,set_currently_syncing,get_logger from . import streams -from .http import Client LOGGER = get_logger() def sync(ctx,catalog: Catalog): @@ -15,11 +14,7 @@ def sync(ctx,catalog: Catalog): ctx.state = set_currently_syncing(ctx.state, tap_stream_id) ctx.write_state() write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) - try: - stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) - except Exception as err: - LOGGER.info("%s",err) - stream_obj.sync(ctx) + stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) ctx.write_state() ctx.state = set_currently_syncing(ctx.state, None) From 76ba0f7194673d020bbc40af2f1c6381a8e66688 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 13 Oct 2022 13:25:12 +0530 Subject: [PATCH 10/39] pylint and formatting issues --- .github/pull_request_template.md | 8 +-- .gitignore | 2 +- .pre-commit-config.yaml | 2 +- LICENSE | 1 - pyproject.toml | 21 ++++++++ setup.cfg | 8 +++ setup.py | 51 ++++++++----------- tap_zendesk_chat/__init__.py | 8 +-- tap_zendesk_chat/discover.py | 31 +++++------- tap_zendesk_chat/http.py | 11 ++-- tap_zendesk_chat/schemas/chats.json | 8 +-- tap_zendesk_chat/streams.py | 78 ++++++++++++++++------------- tap_zendesk_chat/sync.py | 19 +++++-- tap_zendesk_chat/utils.py | 14 +++++- 14 files changed, 154 insertions(+), 108 deletions(-) create mode 100644 pyproject.toml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6e46b00..58b80a3 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,10 +2,10 @@ (write a short description or paste a link to JIRA) # Manual QA steps - - - + - + # Risks - - - + - + # Rollback steps - revert this branch diff --git a/.gitignore b/.gitignore index 9e13d77..6a1a7e9 100644 --- a/.gitignore +++ b/.gitignore @@ -88,4 +88,4 @@ config.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json -*.code-workspace \ No newline at end of file +*.code-workspace diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 644fe89..6717558 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -54,4 +54,4 @@ repos: - repo: https://github.com/codespell-project/codespell rev: v2.2.1 hooks: - - id: codespell \ No newline at end of file + - id: codespell diff --git a/LICENSE b/LICENSE index 753d647..4ec8c3f 100644 --- a/LICENSE +++ b/LICENSE @@ -617,4 +617,3 @@ Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..500b64e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[tool.black] +line-length = 120 +target-version = ['py37',] +include = '\.pyi?$' + +[flake8] +profile = "black" +max-line-length = 120 +exclude = "build,.git,.tox,./tests/.env,tests" +ignore = "W504,W601,D203" + +[tool.pylint] +max-line-length = 120 +disable = ["R0801",] + +[tool.isort] +profile = "black" +multi_line_output = 3 + +[tool.bandit] +exclude_dirs = ["tests",".env"] diff --git a/setup.cfg b/setup.cfg index b88034e..fec1fe5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,10 @@ [metadata] description-file = README.md + + +[flake8] +ignore = W504,W601,D203 +profile = black +max-line-length = 120 +exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,.git,.tox,./tests/.env,tests +max-complexity = 10 diff --git a/setup.py b/setup.py index f20f3de..bd6b0e2 100755 --- a/setup.py +++ b/setup.py @@ -1,33 +1,24 @@ #!/usr/bin/env python -from setuptools import setup, find_packages +from setuptools import find_packages, setup -setup(name="tap-zendesk-chat", - version="0.3.2", - description="Singer.io tap for extracting data from the Zendesk Chat API", - author="Stitch", - url="http://singer.io", - classifiers=["Programming Language :: Python :: 3 :: Only"], - py_modules=["tap_zendesk_chat"], - install_requires=[ - "python-dateutil==2.6.0", # because of singer-python issue - "pendulum==1.2.0", # because of singer-python issue - "singer-python==5.12.1", - "requests==2.20.0", - ], - extras_require={ - 'dev': [ - 'pylint==2.7.4', - 'ipdb', - 'nose' - ] - }, - entry_points=""" - [console_scripts] - tap-zendesk-chat=tap_zendesk_chat:main - """, - packages=["tap_zendesk_chat"], - package_data = { - "schemas": ["tap_zendesk_chat/schemas/*.json"] - }, - include_package_data=True, +setup( + name="tap-zendesk-chat", + version="0.3.2", + description="Singer.io tap for extracting data from the Zendesk Chat API", + author="Stitch", + url="https://singer.io", + classifiers=["Programming Language :: Python :: 3 :: Only"], + py_modules=["tap_zendesk_chat"], + install_requires=[ + "singer-python==5.12.1", + "requests==2.20.0", + ], + extras_require={"dev": ["pylint", "ipdb", "nose"]}, + entry_points=""" + [console_scripts] + tap-zendesk-chat=tap_zendesk_chat:main + """, + packages=find_packages(exclude=["tests"]), + package_data={"schemas": ["tap_zendesk_chat/schemas/*.json"]}, + include_package_data=True, ) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index 088b0cb..aea14c1 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 import os + import singer -from singer.utils import parse_args,handle_top_exception from singer.catalog import Catalog +from singer.utils import handle_top_exception, parse_args + from .context import Context from .discover import discover from .sync import sync @@ -19,8 +21,8 @@ def main(): discover(args.config).dump() else: ctx = Context(args.config, args.state, args.catalog or discover(args.config)) - sync(ctx,args.catalog or discover(args.config)) + sync(ctx, args.catalog or discover(args.config)) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 85b07d8..59fa58a 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -1,18 +1,16 @@ import singer +from requests.exceptions import HTTPError from singer import metadata from singer.catalog import Catalog, CatalogEntry, Schema -from requests.exceptions import HTTPError -from . import streams as streams_ -from .streams import STREAMS +from . import streams as streams_ from .http import Client +from .streams import STREAMS from .utils import load_schema LOGGER = singer.get_logger() - - def account_not_authorized(client): # The account endpoint is restricted to zopim accounts, meaning integrated # Zendesk accounts will get a 403 for this endpoint. @@ -30,23 +28,20 @@ def account_not_authorized(client): return False -def build_metadata(raw_schema :dict, stream): - +def build_metadata(raw_schema: dict, stream): mdata = metadata.new() - metadata.write(mdata, (), 'valid-replication-keys', list(stream.replication_key)) - metadata.write(mdata, (), 'table-key-properties', list(stream.pk_fields)) - for prop in raw_schema['properties'].keys(): + metadata.write(mdata, (), "valid-replication-keys", list(stream.replication_key)) + metadata.write(mdata, (), "table-key-properties", list(stream.pk_fields)) + for prop in raw_schema["properties"].keys(): if prop in stream.replication_key or prop in stream.pk_fields: - metadata.write(mdata, ('properties', prop), 'inclusion', 'automatic') + metadata.write(mdata, ("properties", prop), "inclusion", "automatic") else: - metadata.write(mdata, ('properties', prop), 'inclusion', 'available') + metadata.write(mdata, ("properties", prop), "inclusion", "available") return metadata.to_list(mdata) -def discover(config :dict) -> Catalog: - """ - discover function for tap-zendesk-chat - """ +def discover(config: dict) -> Catalog: + """discover function for tap-zendesk-chat.""" if config: client = Client(config) # perform auth @@ -61,9 +56,7 @@ def discover(config :dict) -> Catalog: "stream": stream_name, "tap_stream_id": stream.tap_stream_id, "schema": schema, - "metadata": build_metadata(schema,stream), + "metadata": build_metadata(schema, stream), } ) return Catalog.from_dict({"streams": streams}) - - diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index ce13586..2412ac2 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -1,6 +1,7 @@ -import requests -from singer import metrics, get_logger import backoff +import requests +from singer import get_logger, metrics + LOGGER = get_logger() BASE_URL = "https://www.zopim.com" @@ -16,16 +17,16 @@ def __init__(self, config): self.user_agent = config.get("user_agent") self.session = requests.Session() - @backoff.on_exception(backoff.expo,RateLimitException,max_tries=10,factor=2) + @backoff.on_exception(backoff.expo, RateLimitException, max_tries=10, factor=2) def request(self, tap_stream_id, params=None, url=None, url_extra=""): if not params: - params={} + params = {} with metrics.http_request_timer(tap_stream_id) as timer: url = url or BASE_URL + "/api/v2/" + tap_stream_id + url_extra headers = {"Authorization": "Bearer " + self.access_token} if self.user_agent: headers["User-Agent"] = self.user_agent - LOGGER.info("calling %s %s",url,params) + LOGGER.info("calling %s %s", url, params) request = requests.Request("GET", url, headers=headers, params=params) response = self.session.send(request.prepare()) timer.tags[metrics.Tag.http_status_code] = response.status_code diff --git a/tap_zendesk_chat/schemas/chats.json b/tap_zendesk_chat/schemas/chats.json index a2c932a..c7829f1 100644 --- a/tap_zendesk_chat/schemas/chats.json +++ b/tap_zendesk_chat/schemas/chats.json @@ -103,10 +103,10 @@ "$ref": "chat_response_time" }, "session": { - "type": [ - "null", - "object" - ] + "type": [ + "null", + "object" + ] }, "history": { "items": { diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 9c94888..9a2375b 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -1,19 +1,13 @@ from datetime import datetime, timedelta -import singer -from singer.utils import strptime_to_utc -from singer import metrics, Transformer from typing import Dict -LOGGER = singer.get_logger() +import singer +from singer import Transformer, metrics +from singer.utils import strptime_to_utc +from .utils import break_into_intervals -def break_into_intervals(days, start_time: str, now: datetime): - delta = timedelta(days=days) - start_dt = strptime_to_utc(start_time) - while start_dt < now: - end_dt = min(start_dt + delta, now) - yield start_dt, end_dt - start_dt = end_dt +LOGGER = singer.get_logger() class Stream: @@ -22,9 +16,11 @@ class Stream: Important class properties: :var tap_stream_id: - :var pk_fields: A list of primary key fields""" + :var pk_fields: A list of primary key fields + """ replication_key = set() + def __init__(self, tap_stream_id, pk_fields): self.tap_stream_id = tap_stream_id self.pk_fields = pk_fields @@ -71,7 +67,8 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme class Chats(Stream): - replication_key = {'timestamp', 'end_timestamp'} + replication_key = {"timestamp", "end_timestamp"} + def _bulk_chats(self, ctx, chat_ids): if not chat_ids: return [] @@ -79,16 +76,13 @@ def _bulk_chats(self, ctx, chat_ids): body = ctx.client.request(self.tap_stream_id, params=params) return list(body["docs"].values()) - def _search(self, ctx, chat_type, ts_field, - start_dt: datetime, end_dt: datetime): - params = { - "q": "type:{} AND {}:[{} TO {}]" - .format(chat_type, ts_field, start_dt.isoformat(), end_dt.isoformat()) - } - return ctx.client.request( - self.tap_stream_id, params=params, url_extra="/search") + def _search(self, ctx, chat_type, ts_field, start_dt: datetime, end_dt: datetime): + params = {"q": f"type:{chat_type} AND {ts_field}:[{start_dt.isoformat()} TO {end_dt.isoformat()}]"} + return ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") - def _pull(self, ctx, chat_type, ts_field, *, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer): + def _pull( + self, ctx, chat_type, ts_field, *, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer + ): """Pulls and writes pages of data for the given chat_type, where chat_type can be either "chat" or "offline_msg". @@ -145,16 +139,34 @@ def _should_run_full_sync(self, ctx): return True next_sync = strptime_to_utc(last_sync) + timedelta(days=int(sync_days)) if next_sync <= ctx.now: - LOGGER.info("Running full sync of chats: " - "last sync was %s, configured to run every %s days", - last_sync, sync_days) + LOGGER.info( + "Running full sync of chats: " "last sync was %s, configured to run every %s days", + last_sync, + sync_days, + ) return True return False def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): full_sync = self._should_run_full_sync(ctx) - self._pull(ctx, "chat", "end_timestamp", full_sync=full_sync,schema=schema,stream_metadata=stream_metadata,transformer=transformer) - self._pull(ctx, "offline_msg", "timestamp", full_sync=full_sync,schema=schema,stream_metadata=stream_metadata,transformer=transformer) + self._pull( + ctx, + "chat", + "end_timestamp", + full_sync=full_sync, + schema=schema, + stream_metadata=stream_metadata, + transformer=transformer, + ) + self._pull( + ctx, + "offline_msg", + "timestamp", + full_sync=full_sync, + schema=schema, + stream_metadata=stream_metadata, + transformer=transformer, + ) if full_sync: ctx.state["chats_last_full_sync"] = ctx.now.isoformat() ctx.write_state() @@ -172,7 +184,7 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme # TODO: Add Additional advanced property in connection_properties } response = ctx.client.request(self.tap_stream_id, params) - page = response.get("visitor",[]) + response.get("ip_address",[]) + page = response.get("visitor", []) + response.get("ip_address", []) if not page: break page = response["visitor"] + response["ip_address"] @@ -183,6 +195,7 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme ctx.set_bookmark(since_id_offset, None) ctx.write_state() + class Account(Stream): def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): # The account endpoint returns a single item, so we have to wrap it in @@ -192,17 +205,14 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme self.write_page([page]) -DEPARTMENTS = Everything("departments", ["id"]) -ACCOUNT = Account("account", ["account_key"]) all_streams = [ Agents("agents", ["id"]), Chats("chats", ["id"]), Everything("shortcuts", ["name"]), Everything("triggers", ["id"]), Bans("bans", ["id"]), - DEPARTMENTS, + Everything("departments", ["id"]), Everything("goals", ["id"]), - ACCOUNT, + Account("account", ["account_key"]), ] -all_stream_ids = [s.tap_stream_id for s in all_streams] -STREAMS = {s.tap_stream_id:s for s in all_streams} \ No newline at end of file +STREAMS = {s.tap_stream_id: s for s in all_streams} diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index 90bf25a..c21f75d 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -1,8 +1,19 @@ -from singer import Transformer,metadata,Catalog,write_state,write_schema,set_currently_syncing,get_logger +from singer import ( + Catalog, + Transformer, + get_logger, + metadata, + set_currently_syncing, + write_schema, + write_state, +) + from . import streams + LOGGER = get_logger() -def sync(ctx,catalog: Catalog): + +def sync(ctx, catalog: Catalog): """performs sync for selected streams.""" with Transformer() as transformer: for stream in catalog.get_selected_streams(ctx.state): @@ -14,8 +25,8 @@ def sync(ctx,catalog: Catalog): ctx.state = set_currently_syncing(ctx.state, tap_stream_id) ctx.write_state() write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) - stream_obj.sync(ctx,schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) + stream_obj.sync(ctx, schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) ctx.write_state() ctx.state = set_currently_syncing(ctx.state, None) - write_state(ctx.state) \ No newline at end of file + write_state(ctx.state) diff --git a/tap_zendesk_chat/utils.py b/tap_zendesk_chat/utils.py index e3e6d82..a79b51f 100644 --- a/tap_zendesk_chat/utils.py +++ b/tap_zendesk_chat/utils.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 import os +from datetime import datetime, timedelta + import singer -from singer.utils import load_json +from singer.utils import load_json, strptime_to_utc + def get_abs_path(path): return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) def load_schema(tap_stream_id): - path = "schemas/{}.json".format(tap_stream_id) + path = f"schemas/{tap_stream_id}.json" schema = load_json(get_abs_path(path)) dependencies = schema.pop("tap_schema_dependencies", []) refs = {} @@ -19,3 +22,10 @@ def load_schema(tap_stream_id): return schema +def break_into_intervals(days, start_time: str, now: datetime): + delta = timedelta(days=days) + start_dt = strptime_to_utc(start_time) + while start_dt < now: + end_dt = min(start_dt + delta, now) + yield start_dt, end_dt + start_dt = end_dt From 3bb576624d665792b91504cb0130ab018b0e7c79 Mon Sep 17 00:00:00 2001 From: shantanu73 Date: Thu, 13 Oct 2022 14:26:31 +0000 Subject: [PATCH 11/39] Changes: 1) modified discovery unit tests. --- tests/unittests/test_auth_discovery.py | 39 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/tests/unittests/test_auth_discovery.py b/tests/unittests/test_auth_discovery.py index 2ca1c10..7cea506 100644 --- a/tests/unittests/test_auth_discovery.py +++ b/tests/unittests/test_auth_discovery.py @@ -33,7 +33,7 @@ def mock_200_account_endpoint_exception(*args, **kwargs): return MockResponse({}, 200) -class TestBasicAuthInDiscoverMode(unittest.TestCase): +class TestDiscoverMode(unittest.TestCase): def test_basic_auth_no_access_401(self): ''' @@ -49,26 +49,37 @@ def test_basic_auth_no_access_401(self): self.assertIn(expected_error_message, str(e.exception)) @mock.patch('tap_zendesk_chat.utils', return_value=Args()) - @mock.patch('tap_zendesk_chat.discover') - def test_discovery_calls_on_200_access(self, mock_discover, mock_utils): + @mock.patch('singer.catalog.Catalog.from_dict', return_value={"key": "value"}) + def test_discovery_no_config(self, mock_utils, mock_catalog): """ - tests if discovery method is getting called after mocking required_config_keys + tests discovery method when config is None. """ - tap_zendesk_chat.main_impl() - self.assertEqual(mock_discover.call_count, 1) + expected = {"key": "value"} + self.assertEqual(tap_zendesk_chat.discover(None), expected) + + @mock.patch('tap_zendesk_chat.utils', return_value=Args()) + @mock.patch('singer.catalog.Catalog.from_dict', return_value={"key": "value"}) + @mock.patch('tap_zendesk_chat.http.Client') + @mock.patch('tap_zendesk_chat.http.Client.request') + def test_discovery(self, mock_utils, mock_catalog, mock_client, mock_request): + """ + tests discovery method. + """ + expected = {"key": "value"} + self.assertEqual(tap_zendesk_chat.discover(Args().config), expected) class TestAccountEndpointAuthorized(unittest.TestCase): - @mock.patch("requests.Session.send") - def test_is_account_endpoint_verified(self, mock_send): + def test_is_account_not_authorized_404(self): """ - verify if is_account_endpoint_authorized fn returns True boolean on 200 status code + tests if account_not_authorized method in discover raises http 404 """ - args = Args() - client = Client(args.config) - mock_send.return_value = mock_200_account_endpoint_exception() - resp = tap_zendesk_chat.is_account_endpoint_authorized(client) - self.assertEqual(resp, True) + client = Client(Args().config) + with self.assertRaises(HTTPError) as e: + client.request("xxxxxxx") + + expected_error_message = "404 Client Error: Not Found for url:" + self.assertIn(expected_error_message, str(e.exception)) From 9abfd9dadcd6beaed91d573447ff39e34015ff7d Mon Sep 17 00:00:00 2001 From: shantanu73 Date: Tue, 18 Oct 2022 02:28:19 +0000 Subject: [PATCH 12/39] Changes: 1) Fixed unittests as per new code refactoring changes. --- tap_zendesk_chat/discover.py | 3 +- tap_zendesk_chat/streams.py | 40 +++++++++---------- tests/unittests/test_auth_discovery.py | 3 +- tests/unittests/test_streams.py | 14 ------- .../{test_metadata.py => test_utils.py} | 30 +++++++------- 5 files changed, 36 insertions(+), 54 deletions(-) delete mode 100644 tests/unittests/test_streams.py rename tests/unittests/{test_metadata.py => test_utils.py} (71%) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 59fa58a..8805d3f 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -1,9 +1,8 @@ import singer from requests.exceptions import HTTPError from singer import metadata -from singer.catalog import Catalog, CatalogEntry, Schema +from singer.catalog import Catalog -from . import streams as streams_ from .http import Client from .streams import STREAMS from .utils import load_schema diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 9a2375b..872ab60 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -109,26 +109,26 @@ def _pull( LOGGER.info("Using chat_search_interval_days: %s", interval_days) intervals = break_into_intervals(interval_days, start_time, ctx.now) - with Transformer() as transformer: - for start_dt, end_dt in intervals: - while True: - if next_url: - search_resp = ctx.client.request(self.tap_stream_id, url=next_url) - else: - search_resp = self._search(ctx, chat_type, ts_field, start_dt, end_dt) - next_url = search_resp["next_url"] - ctx.set_bookmark(url_offset_key, next_url) - ctx.write_state() - chat_ids = [r["id"] for r in search_resp["results"]] - chats = self._bulk_chats(ctx, chat_ids) - if chats: - chats = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in chats] - self.write_page(chats) - max_bookmark = max(max_bookmark, *[c[ts_field] for c in chats]) - if not next_url: - break - ctx.set_bookmark(ts_bookmark_key, max_bookmark) + + for start_dt, end_dt in intervals: + while True: + if next_url: + search_resp = ctx.client.request(self.tap_stream_id, url=next_url) + else: + search_resp = self._search(ctx, chat_type, ts_field, start_dt, end_dt) + next_url = search_resp["next_url"] + ctx.set_bookmark(url_offset_key, next_url) ctx.write_state() + chat_ids = [r["id"] for r in search_resp["results"]] + chats = self._bulk_chats(ctx, chat_ids) + if chats: + chats = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in chats] + self.write_page(chats) + max_bookmark = max(max_bookmark, *[c[ts_field] for c in chats]) + if not next_url: + break + ctx.set_bookmark(ts_bookmark_key, max_bookmark) + ctx.write_state() def _should_run_full_sync(self, ctx): sync_days = ctx.config.get("chats_full_sync_days") @@ -140,7 +140,7 @@ def _should_run_full_sync(self, ctx): next_sync = strptime_to_utc(last_sync) + timedelta(days=int(sync_days)) if next_sync <= ctx.now: LOGGER.info( - "Running full sync of chats: " "last sync was %s, configured to run every %s days", + "Running full sync of chats: last sync was %s, configured to run every %s days", last_sync, sync_days, ) diff --git a/tests/unittests/test_auth_discovery.py b/tests/unittests/test_auth_discovery.py index 7cea506..041fcd2 100644 --- a/tests/unittests/test_auth_discovery.py +++ b/tests/unittests/test_auth_discovery.py @@ -59,9 +59,8 @@ def test_discovery_no_config(self, mock_utils, mock_catalog): @mock.patch('tap_zendesk_chat.utils', return_value=Args()) @mock.patch('singer.catalog.Catalog.from_dict', return_value={"key": "value"}) - @mock.patch('tap_zendesk_chat.http.Client') @mock.patch('tap_zendesk_chat.http.Client.request') - def test_discovery(self, mock_utils, mock_catalog, mock_client, mock_request): + def test_discovery(self, mock_utils, mock_catalog, mock_request): """ tests discovery method. """ diff --git a/tests/unittests/test_streams.py b/tests/unittests/test_streams.py deleted file mode 100644 index c21c9c6..0000000 --- a/tests/unittests/test_streams.py +++ /dev/null @@ -1,14 +0,0 @@ -import pendulum -from tap_zendesk_chat.streams import break_into_intervals - - -def test_intervals(): - days = 30 - now = pendulum.parse("2018-02-14T10:30:20") - broken = break_into_intervals(days, "2018-01-02T18:14:33", now) - as_strs = [(x.isoformat(), y.isoformat()) for x, y in broken] - assert as_strs == [ - ("2018-01-02T18:14:33+00:00", "2018-02-01T18:14:33+00:00"), - ("2018-02-01T18:14:33+00:00", "2018-02-14T10:30:20+00:00"), - ] - diff --git a/tests/unittests/test_metadata.py b/tests/unittests/test_utils.py similarity index 71% rename from tests/unittests/test_metadata.py rename to tests/unittests/test_utils.py index 1da89c2..895f029 100644 --- a/tests/unittests/test_metadata.py +++ b/tests/unittests/test_utils.py @@ -1,5 +1,6 @@ -import tap_zendesk_chat +from tap_zendesk_chat import utils import unittest +import pendulum class BaseMetadata: @@ -49,27 +50,24 @@ class TestMetadataFunctions(unittest.TestCase): POSITIVE_TEST_STREAMS = [Account, Departments] NEGATIVE_TEST_STREAM = [Bans] - def test_is_selected(self): - """ - tests is_selected fn in tap_zendesk_chat/__init__.py file - checks if selected field is set as true in metadata - """ - for stream in self.POSITIVE_TEST_STREAMS: - self.assertEqual(True, tap_zendesk_chat.is_selected(stream)) - - for stream in self.NEGATIVE_TEST_STREAM: - self.assertEqual(False, tap_zendesk_chat.is_selected(stream)) - def test_load_schema(self): """ tests load_schema fn in tap_zendesk_chat/__init__.py file checks if length of properties attr equals with size of properties in loaded schema using load_schema fn """ for stream in self.POSITIVE_TEST_STREAMS: - self.assertEquals(len(stream.properties), len(tap_zendesk_chat.load_schema(stream.stream)['properties'])) + self.assertEquals(len(stream.properties), len(utils.load_schema(stream.stream)['properties'])) for stream in self.NEGATIVE_TEST_STREAM: - self.assertNotEqual(len(stream.properties), len(tap_zendesk_chat.load_schema(stream.stream)['properties'])) - - + self.assertNotEqual(len(stream.properties), len(utils.load_schema(stream.stream)['properties'])) + + def test_intervals(self): + days = 30 + now = pendulum.parse("2018-02-14T10:30:20") + broken = utils.break_into_intervals(days, "2018-01-02T18:14:33", now) + as_strs = [(x.isoformat(), y.isoformat()) for x, y in broken] + assert as_strs == [ + ("2018-01-02T18:14:33+00:00", "2018-02-01T18:14:33+00:00"), + ("2018-02-01T18:14:33+00:00", "2018-02-14T10:30:20+00:00"), + ] From 5b7114404c43537391e33aa8369583b991784576 Mon Sep 17 00:00:00 2001 From: shantanu73 Date: Tue, 18 Oct 2022 02:56:52 +0000 Subject: [PATCH 13/39] removed dependancy of unittest on pendulum library --- tests/unittests/test_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unittests/test_utils.py b/tests/unittests/test_utils.py index 895f029..ecef954 100644 --- a/tests/unittests/test_utils.py +++ b/tests/unittests/test_utils.py @@ -1,7 +1,5 @@ from tap_zendesk_chat import utils import unittest -import pendulum - class BaseMetadata: """ @@ -63,7 +61,7 @@ def test_load_schema(self): def test_intervals(self): days = 30 - now = pendulum.parse("2018-02-14T10:30:20") + now = utils.strptime_to_utc("2018-02-14T10:30:20") broken = utils.break_into_intervals(days, "2018-01-02T18:14:33", now) as_strs = [(x.isoformat(), y.isoformat()) for x, y in broken] assert as_strs == [ From ab003100bad640f00ebdc016ba5a37be3266a86d Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 18 Oct 2022 10:30:25 +0530 Subject: [PATCH 14/39] added typehints and docstrings --- config.json.sample | 0 tap_zendesk_chat/context.py | 29 +++++++++++++++++++++-------- tap_zendesk_chat/discover.py | 3 +-- tap_zendesk_chat/streams.py | 2 +- 4 files changed, 23 insertions(+), 11 deletions(-) create mode 100644 config.json.sample diff --git a/config.json.sample b/config.json.sample new file mode 100644 index 0000000..e69de29 diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index 68ed973..90dd880 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -1,10 +1,17 @@ from datetime import datetime +from typing import Dict, List + +from singer import Catalog,write_state from singer.utils import now -import singer + from .http import Client + class Context: - def __init__(self, config, state, catalog): + """ + Wrapper Class Around state bookmarking + """ + def __init__(self, config :Dict, state :Dict, catalog :Catalog): self.config = config self.state = state self.catalog = catalog @@ -13,16 +20,22 @@ def __init__(self, config, state, catalog): @property def bookmarks(self): + """ + Provides read-only access to bookmarks, creates one if does not exist + """ if "bookmarks" not in self.state: self.state["bookmarks"] = {} return self.state["bookmarks"] - def bookmark(self, path): + def bookmark(self, path :List): + """ + checks the state[file] for a nested path of bookmarks and returns value + """ bookmark = self.bookmarks - for p in path: - if p not in bookmark: - bookmark[p] = {} - bookmark = bookmark[p] + for key in path: + if key not in bookmark: + bookmark[key] = {} + bookmark = bookmark[key] return bookmark def set_bookmark(self, path, val): @@ -38,4 +51,4 @@ def update_start_date_bookmark(self, path): return val def write_state(self): - singer.write_state(self.state) + write_state(self.state) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 59fa58a..81b6825 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -44,8 +44,7 @@ def discover(config: dict) -> Catalog: """discover function for tap-zendesk-chat.""" if config: client = Client(config) - # perform auth - client.request(STREAMS["departments"].tap_stream_id) + client.request(STREAMS["chats"].tap_stream_id) if account_not_authorized(client): STREAMS.pop("account") streams = [] diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 9a2375b..e7c0508 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -130,7 +130,7 @@ def _pull( ctx.set_bookmark(ts_bookmark_key, max_bookmark) ctx.write_state() - def _should_run_full_sync(self, ctx): + def _should_run_full_sync(self, ctx) -> bool: sync_days = ctx.config.get("chats_full_sync_days") if sync_days: last_sync = ctx.state.get("chats_last_full_sync") From cd1822b4da3afd4c87a5302fc2730052e67a68a1 Mon Sep 17 00:00:00 2001 From: shantanu73 Date: Tue, 18 Oct 2022 05:30:58 +0000 Subject: [PATCH 15/39] Changes: 1) Removed unused import from init file. 2) Fixed bookmarks test. --- tap_zendesk_chat/__init__.py | 2 -- tests/test_bookmarks.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index aea14c1..1dd9b03 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import os - import singer -from singer.catalog import Catalog from singer.utils import handle_top_exception, parse_args from .context import Context diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py index 6a31258..5287853 100644 --- a/tests/test_bookmarks.py +++ b/tests/test_bookmarks.py @@ -179,7 +179,7 @@ def test_run(self): # Verify the number of records in the second sync is the same as the first self.assertEqual(second_sync_count, first_sync_count) - if stream == 'agents': + if stream in ('agents', 'bans'): self.assertEqual(first_bookmark_key_value, second_bookmark_key_value, {'offset': {'id': None}}) else: # Verify the syncs do not set a bookmark for full table streams From c0f6db5a2f3d90d887a90532e2d6c8ee8de82a3e Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 3 Nov 2022 08:16:04 +0000 Subject: [PATCH 16/39] added config param for bans stream page size --- tap_zendesk_chat/__init__.py | 1 + tap_zendesk_chat/context.py | 21 +++++++++------------ tap_zendesk_chat/streams.py | 10 +++++----- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index 1dd9b03..45eb5f7 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import os + import singer from singer.utils import handle_top_exception, parse_args diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index 90dd880..381f07b 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -1,17 +1,16 @@ from datetime import datetime from typing import Dict, List -from singer import Catalog,write_state +from singer import Catalog, write_state from singer.utils import now from .http import Client class Context: - """ - Wrapper Class Around state bookmarking - """ - def __init__(self, config :Dict, state :Dict, catalog :Catalog): + """Wrapper Class Around state bookmarking.""" + + def __init__(self, config: Dict, state: Dict, catalog: Catalog): self.config = config self.state = state self.catalog = catalog @@ -20,17 +19,15 @@ def __init__(self, config :Dict, state :Dict, catalog :Catalog): @property def bookmarks(self): - """ - Provides read-only access to bookmarks, creates one if does not exist - """ + """Provides read-only access to bookmarks, creates one if does not + exist.""" if "bookmarks" not in self.state: self.state["bookmarks"] = {} return self.state["bookmarks"] - def bookmark(self, path :List): - """ - checks the state[file] for a nested path of bookmarks and returns value - """ + def bookmark(self, path: List): + """checks the state[file] for a nested path of bookmarks and returns + value.""" bookmark = self.bookmarks for key in path: if key not in bookmark: diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 6234d35..60f20c2 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -180,7 +180,7 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme params = { "since_id": since_id, - "limit": ctx.config.get("agents_page_limit", 100), + "limit": ctx.config.get("bans_page_limit", 100), # TODO: Add Additional advanced property in connection_properties } response = ctx.client.request(self.tap_stream_id, params) @@ -206,13 +206,13 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme all_streams = [ + Account("account", ["account_key"]), Agents("agents", ["id"]), - Chats("chats", ["id"]), - Everything("shortcuts", ["name"]), - Everything("triggers", ["id"]), Bans("bans", ["id"]), + Chats("chats", ["id"]), Everything("departments", ["id"]), Everything("goals", ["id"]), - Account("account", ["account_key"]), + Everything("shortcuts", ["name"]), + Everything("triggers", ["id"]), ] STREAMS = {s.tap_stream_id: s for s in all_streams} From 3edd8652fd7aefc9caa11d8b6adb385743307fbf Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 3 Nov 2022 08:27:41 +0000 Subject: [PATCH 17/39] added warning for 400 exception --- tap_zendesk_chat/http.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index 2412ac2..a769e32 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -32,5 +32,8 @@ def request(self, tap_stream_id, params=None, url=None, url_extra=""): timer.tags[metrics.Tag.http_status_code] = response.status_code if response.status_code in [429, 502]: raise RateLimitException() + elif response.status_code == 400: + LOGGER.warning("The amount of data present for in %s stream is huge,\ + The api has a pagination limit of 251 pages, please reduce the search window for this stream") response.raise_for_status() return response.json() From 2c2c5cb5e072b4abad84b02a9385ee7293c3bf77 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 3 Nov 2022 09:22:13 +0000 Subject: [PATCH 18/39] fixed schema issues --- tap_zendesk_chat/schemas/bans.json | 7 +++++++ tap_zendesk_chat/schemas/shortcuts.json | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/tap_zendesk_chat/schemas/bans.json b/tap_zendesk_chat/schemas/bans.json index ed1541e..1538c2e 100644 --- a/tap_zendesk_chat/schemas/bans.json +++ b/tap_zendesk_chat/schemas/bans.json @@ -38,6 +38,13 @@ "null", "string" ] + }, + "created_at": { + "type": [ + "null", + "string" + ], + "format": "date-time" } } } diff --git a/tap_zendesk_chat/schemas/shortcuts.json b/tap_zendesk_chat/schemas/shortcuts.json index a9228ed..6068203 100644 --- a/tap_zendesk_chat/schemas/shortcuts.json +++ b/tap_zendesk_chat/schemas/shortcuts.json @@ -48,6 +48,15 @@ "items": { "type": "integer" } + }, + "agents": { + "type": [ + "null", + "array" + ], + "items": { + "type": "integer" + } } }, "type": [ From 9a22a35e947acb1e4f3e0b6dcb4bbe784ffe3645 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 8 Nov 2022 14:31:31 +0530 Subject: [PATCH 19/39] added replication method to catalog --- tap_zendesk_chat/discover.py | 1 + tap_zendesk_chat/streams.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index c31c7c4..10c87bc 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -31,6 +31,7 @@ def build_metadata(raw_schema: dict, stream): mdata = metadata.new() metadata.write(mdata, (), "valid-replication-keys", list(stream.replication_key)) metadata.write(mdata, (), "table-key-properties", list(stream.pk_fields)) + metadata.write(mdata, (), "forced-replication-method", stream.forced_replication_method) for prop in raw_schema["properties"].keys(): if prop in stream.replication_key or prop in stream.pk_fields: metadata.write(mdata, ("properties", prop), "inclusion", "automatic") diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 60f20c2..864ae61 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -20,6 +20,7 @@ class Stream: """ replication_key = set() + forced_replication_method = "FULL_TABLE" def __init__(self, tap_stream_id, pk_fields): self.tap_stream_id = tap_stream_id @@ -68,6 +69,7 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme class Chats(Stream): replication_key = {"timestamp", "end_timestamp"} + forced_replication_method = "INCREMENTAL" def _bulk_chats(self, ctx, chat_ids): if not chat_ids: From 24afc7eaf9fd13083116136c7d22dda247600f1f Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 9 Nov 2022 07:26:34 +0000 Subject: [PATCH 20/39] fixed automatic fields issue --- tap_zendesk_chat/discover.py | 2 +- tap_zendesk_chat/streams.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 10c87bc..84115ec 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -33,7 +33,7 @@ def build_metadata(raw_schema: dict, stream): metadata.write(mdata, (), "table-key-properties", list(stream.pk_fields)) metadata.write(mdata, (), "forced-replication-method", stream.forced_replication_method) for prop in raw_schema["properties"].keys(): - if prop in stream.replication_key or prop in stream.pk_fields: + if (prop in stream.replication_key) or (prop in stream.pk_fields) or (prop in stream.auto_fields): metadata.write(mdata, ("properties", prop), "inclusion", "automatic") else: metadata.write(mdata, ("properties", prop), "inclusion", "available") diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 864ae61..db7c2bb 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -22,9 +22,10 @@ class Stream: replication_key = set() forced_replication_method = "FULL_TABLE" - def __init__(self, tap_stream_id, pk_fields): + def __init__(self, tap_stream_id, pk_fields,auto_fields=None): self.tap_stream_id = tap_stream_id self.pk_fields = pk_fields + self.auto_fields = auto_fields def metrics(self, page): with metrics.record_counter(self.tap_stream_id) as counter: @@ -208,13 +209,13 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme all_streams = [ - Account("account", ["account_key"]), - Agents("agents", ["id"]), - Bans("bans", ["id"]), - Chats("chats", ["id"]), - Everything("departments", ["id"]), - Everything("goals", ["id"]), - Everything("shortcuts", ["name"]), - Everything("triggers", ["id"]), + Account("account", ["account_key"],[]), + Agents("agents", ["id"],[]), + Bans("bans", ["id"],[]), + Chats("chats", ["id"],["type"]), + Everything("departments", ["id"],[]), + Everything("goals", ["id"],[]), + Everything("shortcuts", ["name"],[]), + Everything("triggers", ["id"],[]), ] STREAMS = {s.tap_stream_id: s for s in all_streams} From d0b5e4ba0c394816e9d148473e9969c7483fc3b8 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 10 Nov 2022 02:29:31 +0000 Subject: [PATCH 21/39] fixed discovery changes --- tap_zendesk_chat/discover.py | 2 +- tap_zendesk_chat/streams.py | 38 +++++++++++++++--------------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 84115ec..f9c6423 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -33,7 +33,7 @@ def build_metadata(raw_schema: dict, stream): metadata.write(mdata, (), "table-key-properties", list(stream.pk_fields)) metadata.write(mdata, (), "forced-replication-method", stream.forced_replication_method) for prop in raw_schema["properties"].keys(): - if (prop in stream.replication_key) or (prop in stream.pk_fields) or (prop in stream.auto_fields): + if (prop in stream.replication_key) or (prop in stream.pk_fields): metadata.write(mdata, ("properties", prop), "inclusion", "automatic") else: metadata.write(mdata, ("properties", prop), "inclusion", "available") diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index db7c2bb..6f36738 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -22,10 +22,9 @@ class Stream: replication_key = set() forced_replication_method = "FULL_TABLE" - def __init__(self, tap_stream_id, pk_fields,auto_fields=None): + def __init__(self, tap_stream_id, pk_fields): self.tap_stream_id = tap_stream_id self.pk_fields = pk_fields - self.auto_fields = auto_fields def metrics(self, page): with metrics.record_counter(self.tap_stream_id) as counter: @@ -84,7 +83,7 @@ def _search(self, ctx, chat_type, ts_field, start_dt: datetime, end_dt: datetime return ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") def _pull( - self, ctx, chat_type, ts_field, *, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer + self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer ): """Pulls and writes pages of data for the given chat_type, where chat_type can be either "chat" or "offline_msg". @@ -105,10 +104,8 @@ def _pull( next_url = ctx.bookmark(url_offset_key) max_bookmark = start_time - interval_days = 14 - interval_days_str = ctx.config.get("chat_search_interval_days") - if interval_days_str is not None: - interval_days = int(interval_days_str) + + interval_days = int(ctx.config.get("chat_search_interval_days","14")) LOGGER.info("Using chat_search_interval_days: %s", interval_days) intervals = break_into_intervals(interval_days, start_time, ctx.now) @@ -122,8 +119,7 @@ def _pull( next_url = search_resp["next_url"] ctx.set_bookmark(url_offset_key, next_url) ctx.write_state() - chat_ids = [r["id"] for r in search_resp["results"]] - chats = self._bulk_chats(ctx, chat_ids) + chats = self._bulk_chats(ctx, [r["id"] for r in search_resp["results"]]) if chats: chats = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in chats] self.write_page(chats) @@ -179,12 +175,11 @@ class Bans(Stream): def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): since_id_offset = [self.tap_stream_id, "offset", "id"] since_id = ctx.bookmark(since_id_offset) or 0 - while True: + while True: params = { "since_id": since_id, "limit": ctx.config.get("bans_page_limit", 100), - # TODO: Add Additional advanced property in connection_properties } response = ctx.client.request(self.tap_stream_id, params) page = response.get("visitor", []) + response.get("ip_address", []) @@ -207,15 +202,14 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme page = transformer.transform(response, schema, metadata=stream_metadata) self.write_page([page]) +STREAMS = { + "account":Account("account", ["account_key"]), + "agents":Agents("agents", ["id"]), + "bans":Bans("bans", ["id"]), + "chats":Chats("chats", ["id"]), + "departments":Everything("departments", ["id"]), + "goals":Everything("goals", ["id"]), + "shortcuts":Everything("shortcuts", ["name"]), + "triggers":Everything("triggers", ["id"]), +} -all_streams = [ - Account("account", ["account_key"],[]), - Agents("agents", ["id"],[]), - Bans("bans", ["id"],[]), - Chats("chats", ["id"],["type"]), - Everything("departments", ["id"],[]), - Everything("goals", ["id"],[]), - Everything("shortcuts", ["name"],[]), - Everything("triggers", ["id"],[]), -] -STREAMS = {s.tap_stream_id: s for s in all_streams} From c435e42065a59f99fbeb0b66ec99e88529c8613a Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Thu, 10 Nov 2022 12:46:03 +0000 Subject: [PATCH 22/39] fixed pylint issue --- config.json.sample | 4 ++++ tap_zendesk_chat/__init__.py | 2 -- tap_zendesk_chat/http.py | 7 +++--- tap_zendesk_chat/streams.py | 41 +++++++++++++++--------------------- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/config.json.sample b/config.json.sample index e69de29..d565082 100644 --- a/config.json.sample +++ b/config.json.sample @@ -0,0 +1,4 @@ +{ + "access_token":"", + "start_date":"12/01/2010" +} diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index 45eb5f7..b67a560 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -import os - import singer from singer.utils import handle_top_exception, parse_args diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index a769e32..59838b9 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -12,7 +12,6 @@ class RateLimitException(Exception): class Client: def __init__(self, config): - # self.session = requests.Session() self.access_token = config["access_token"] self.user_agent = config.get("user_agent") self.session = requests.Session() @@ -33,7 +32,9 @@ def request(self, tap_stream_id, params=None, url=None, url_extra=""): if response.status_code in [429, 502]: raise RateLimitException() elif response.status_code == 400: - LOGGER.warning("The amount of data present for in %s stream is huge,\ - The api has a pagination limit of 251 pages, please reduce the search window for this stream") + LOGGER.warning( + "The amount of data present for in %s stream is huge,\ + The api has a pagination limit of 251 pages, please reduce the search window for this stream" + ) response.raise_for_status() return response.json() diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 6f36738..fbe5a8a 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import Dict +from typing import Dict, List import singer from singer import Transformer, metrics @@ -22,18 +22,16 @@ class Stream: replication_key = set() forced_replication_method = "FULL_TABLE" - def __init__(self, tap_stream_id, pk_fields): + def __init__(self, tap_stream_id: str, pk_fields: List): self.tap_stream_id = tap_stream_id self.pk_fields = pk_fields def metrics(self, page): + "updates the metrics counter for the current stream" with metrics.record_counter(self.tap_stream_id) as counter: counter.increment(len(page)) - def format_response(self, response): - return [response] if isinstance(response, list) else response - - def write_page(self, page): + def write_page(self, page: List): """Formats a list of records in place and outputs the data to stdout.""" singer.write_records(self.tap_stream_id, page) @@ -71,7 +69,7 @@ class Chats(Stream): replication_key = {"timestamp", "end_timestamp"} forced_replication_method = "INCREMENTAL" - def _bulk_chats(self, ctx, chat_ids): + def _bulk_chats(self, ctx, chat_ids: List): if not chat_ids: return [] params = {"ids": ",".join(chat_ids)} @@ -82,9 +80,7 @@ def _search(self, ctx, chat_type, ts_field, start_dt: datetime, end_dt: datetime params = {"q": f"type:{chat_type} AND {ts_field}:[{start_dt.isoformat()} TO {end_dt.isoformat()}]"} return ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") - def _pull( - self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer - ): + def _pull(self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer): """Pulls and writes pages of data for the given chat_type, where chat_type can be either "chat" or "offline_msg". @@ -104,13 +100,10 @@ def _pull( next_url = ctx.bookmark(url_offset_key) max_bookmark = start_time - - interval_days = int(ctx.config.get("chat_search_interval_days","14")) + interval_days = int(ctx.config.get("chat_search_interval_days", "14")) LOGGER.info("Using chat_search_interval_days: %s", interval_days) - intervals = break_into_intervals(interval_days, start_time, ctx.now) - - for start_dt, end_dt in intervals: + for start_dt, end_dt in break_into_intervals(interval_days, start_time, ctx.now): while True: if next_url: search_resp = ctx.client.request(self.tap_stream_id, url=next_url) @@ -202,14 +195,14 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme page = transformer.transform(response, schema, metadata=stream_metadata) self.write_page([page]) + STREAMS = { - "account":Account("account", ["account_key"]), - "agents":Agents("agents", ["id"]), - "bans":Bans("bans", ["id"]), - "chats":Chats("chats", ["id"]), - "departments":Everything("departments", ["id"]), - "goals":Everything("goals", ["id"]), - "shortcuts":Everything("shortcuts", ["name"]), - "triggers":Everything("triggers", ["id"]), + "account": Account("account", ["account_key"]), + "agents": Agents("agents", ["id"]), + "bans": Bans("bans", ["id"]), + "chats": Chats("chats", ["id"]), + "departments": Everything("departments", ["id"]), + "goals": Everything("goals", ["id"]), + "shortcuts": Everything("shortcuts", ["name"]), + "triggers": Everything("triggers", ["id"]), } - From 8e2e05cae5bea25e7bc848651176bc9bd0754e28 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Fri, 11 Nov 2022 18:00:23 +0530 Subject: [PATCH 23/39] created classes for each stream --- tap_zendesk_chat/discover.py | 6 +- tap_zendesk_chat/streams.py | 128 ++++++++++++++++++++++------------- tap_zendesk_chat/sync.py | 4 +- 3 files changed, 85 insertions(+), 53 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index f9c6423..0f0ea8d 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -29,11 +29,11 @@ def account_not_authorized(client): def build_metadata(raw_schema: dict, stream): mdata = metadata.new() - metadata.write(mdata, (), "valid-replication-keys", list(stream.replication_key)) - metadata.write(mdata, (), "table-key-properties", list(stream.pk_fields)) + metadata.write(mdata, (), "valid-replication-keys", list(stream.valid_replication_keys)) + metadata.write(mdata, (), "table-key-properties", list(stream.key_properties)) metadata.write(mdata, (), "forced-replication-method", stream.forced_replication_method) for prop in raw_schema["properties"].keys(): - if (prop in stream.replication_key) or (prop in stream.pk_fields): + if (prop in stream.valid_replication_keys) or (prop in stream.key_properties): metadata.write(mdata, ("properties", prop), "inclusion", "automatic") else: metadata.write(mdata, ("properties", prop), "inclusion", "available") diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index fbe5a8a..940d4b8 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -10,7 +10,7 @@ LOGGER = singer.get_logger() -class Stream: +class BaseStream: """Information about and functions for syncing streams. Important class properties: @@ -19,12 +19,8 @@ class Stream: :var pk_fields: A list of primary key fields """ - replication_key = set() - forced_replication_method = "FULL_TABLE" - - def __init__(self, tap_stream_id: str, pk_fields: List): - self.tap_stream_id = tap_stream_id - self.pk_fields = pk_fields + valid_replication_keys = set() + tap_stream_id = None def metrics(self, page): "updates the metrics counter for the current stream" @@ -37,15 +33,30 @@ def write_page(self, page: List): singer.write_records(self.tap_stream_id, page) self.metrics(page) - -class Everything(Stream): def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): response = ctx.client.request(self.tap_stream_id) page = [transformer.transform(rec, schema, metadata=stream_metadata) for rec in response] self.write_page(page) -class Agents(Stream): +class Account(BaseStream): + + tap_stream_id = "account" + key_properties = ["account_key"] + forced_replication_method = "FULL_TABLE" + + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): + response = ctx.client.request(self.tap_stream_id) + page = transformer.transform(response, schema, metadata=stream_metadata) + self.write_page([page]) + + +class Agents(BaseStream): + + tap_stream_id = "agents" + key_properties = ["id"] + forced_replication_method = "FULL_TABLE" + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): since_id_offset = [self.tap_stream_id, "offset", "id"] since_id = ctx.bookmark(since_id_offset) or 0 @@ -62,12 +73,41 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme ctx.set_bookmark(since_id_offset, since_id) ctx.write_state() ctx.set_bookmark(since_id_offset, None) - ctx.write_state() -class Chats(Stream): - replication_key = {"timestamp", "end_timestamp"} +class Bans(BaseStream): + + tap_stream_id = "bans" + key_properties = ["id"] + forced_replication_method = "FULL_TABLE" + + def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): + since_id_offset = [self.tap_stream_id, "offset", "id"] + since_id = ctx.bookmark(since_id_offset) or 0 + + while True: + params = { + "since_id": since_id, + "limit": ctx.config.get("bans_page_limit", 100), + } + response = ctx.client.request(self.tap_stream_id, params) + page = response.get("visitor", []) + response.get("ip_address", []) + if not page: + break + page = response["visitor"] + response["ip_address"] + self.write_page([transformer.transform(rec, schema, metadata=stream_metadata) for rec in page]) + since_id = page[-1]["id"] + 1 + ctx.set_bookmark(since_id_offset, since_id) + ctx.write_state() + ctx.set_bookmark(since_id_offset, None) + + +class Chats(BaseStream): + + tap_stream_id = "chats" + key_properties = ["id"] forced_replication_method = "INCREMENTAL" + valid_replication_keys = {"timestamp", "end_timestamp"} def _bulk_chats(self, ctx, chat_ids: List): if not chat_ids: @@ -164,45 +204,37 @@ def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transforme ctx.write_state() -class Bans(Stream): - def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): - since_id_offset = [self.tap_stream_id, "offset", "id"] - since_id = ctx.bookmark(since_id_offset) or 0 +class Departments(BaseStream): + tap_stream_id = "departments" + key_properties = ["id"] + forced_replication_method = "FULL_TABLE" - while True: - params = { - "since_id": since_id, - "limit": ctx.config.get("bans_page_limit", 100), - } - response = ctx.client.request(self.tap_stream_id, params) - page = response.get("visitor", []) + response.get("ip_address", []) - if not page: - break - page = response["visitor"] + response["ip_address"] - self.write_page([transformer.transform(rec, schema, metadata=stream_metadata) for rec in page]) - since_id = page[-1]["id"] + 1 - ctx.set_bookmark(since_id_offset, since_id) - ctx.write_state() - ctx.set_bookmark(since_id_offset, None) - ctx.write_state() +class Goals(BaseStream): + tap_stream_id = "goals" + key_properties = ["id"] + forced_replication_method = "FULL_TABLE" -class Account(Stream): - def sync(self, ctx, schema: Dict, stream_metadata: Dict, transformer: Transformer): - # The account endpoint returns a single item, so we have to wrap it in - # a list to write a "page" - response = ctx.client.request(self.tap_stream_id) - page = transformer.transform(response, schema, metadata=stream_metadata) - self.write_page([page]) + +class Shortcuts(BaseStream): + tap_stream_id = "shortcuts" + key_properties = ["name"] + forced_replication_method = "FULL_TABLE" + + +class Triggers(BaseStream): + tap_stream_id = "triggers" + key_properties = ["id"] + forced_replication_method = "FULL_TABLE" STREAMS = { - "account": Account("account", ["account_key"]), - "agents": Agents("agents", ["id"]), - "bans": Bans("bans", ["id"]), - "chats": Chats("chats", ["id"]), - "departments": Everything("departments", ["id"]), - "goals": Everything("goals", ["id"]), - "shortcuts": Everything("shortcuts", ["name"]), - "triggers": Everything("triggers", ["id"]), + Account.tap_stream_id: Account, + Agents.tap_stream_id: Agents, + Bans.tap_stream_id: Bans, + Chats.tap_stream_id: Chats, + Departments.tap_stream_id: Departments, + Goals.tap_stream_id: Goals, + Shortcuts.tap_stream_id: Shortcuts, + Triggers.tap_stream_id: Triggers, } diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index c21f75d..4135036 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -8,7 +8,7 @@ write_state, ) -from . import streams +from .streams import STREAMS LOGGER = get_logger() @@ -20,7 +20,7 @@ def sync(ctx, catalog: Catalog): tap_stream_id = stream.tap_stream_id stream_schema = stream.schema.to_dict() stream_metadata = metadata.to_map(stream.metadata) - stream_obj = streams.STREAMS[tap_stream_id] + stream_obj = STREAMS[tap_stream_id]() LOGGER.info("Starting sync for stream: %s", tap_stream_id) ctx.state = set_currently_syncing(ctx.state, tap_stream_id) ctx.write_state() From 1796c45bad144331c0e590543e7bfb2bbe9d9117 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Fri, 11 Nov 2022 18:29:56 +0530 Subject: [PATCH 24/39] minor enhancements --- tap_zendesk_chat/__init__.py | 2 +- tap_zendesk_chat/http.py | 17 ++++++++--------- tap_zendesk_chat/sync.py | 5 ++--- tap_zendesk_chat/utils.py | 15 +++------------ 4 files changed, 14 insertions(+), 25 deletions(-) diff --git a/tap_zendesk_chat/__init__.py b/tap_zendesk_chat/__init__.py index b67a560..38122a7 100644 --- a/tap_zendesk_chat/__init__.py +++ b/tap_zendesk_chat/__init__.py @@ -18,7 +18,7 @@ def main(): discover(args.config).dump() else: ctx = Context(args.config, args.state, args.catalog or discover(args.config)) - sync(ctx, args.catalog or discover(args.config)) + sync(ctx) if __name__ == "__main__": diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index 59838b9..9261768 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -13,22 +13,21 @@ class RateLimitException(Exception): class Client: def __init__(self, config): self.access_token = config["access_token"] - self.user_agent = config.get("user_agent") + self.user_agent = config.get("user_agent","tap-zendesk-chat") + self.headers = {} + self.headers["Authorization"] = f"Bearer {self.access_token}" + self.headers["User-Agent"] = self.user_agent self.session = requests.Session() @backoff.on_exception(backoff.expo, RateLimitException, max_tries=10, factor=2) def request(self, tap_stream_id, params=None, url=None, url_extra=""): - if not params: - params = {} with metrics.http_request_timer(tap_stream_id) as timer: - url = url or BASE_URL + "/api/v2/" + tap_stream_id + url_extra - headers = {"Authorization": "Bearer " + self.access_token} - if self.user_agent: - headers["User-Agent"] = self.user_agent + + url = f"{url or BASE_URL}/api/v2/{tap_stream_id}{url_extra}" LOGGER.info("calling %s %s", url, params) - request = requests.Request("GET", url, headers=headers, params=params) - response = self.session.send(request.prepare()) + response = self.session.get(url,headers=self.headers,params=params) timer.tags[metrics.Tag.http_status_code] = response.status_code + if response.status_code in [429, 502]: raise RateLimitException() elif response.status_code == 400: diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index 4135036..abae92f 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -1,5 +1,4 @@ from singer import ( - Catalog, Transformer, get_logger, metadata, @@ -13,10 +12,10 @@ LOGGER = get_logger() -def sync(ctx, catalog: Catalog): +def sync(ctx): """performs sync for selected streams.""" with Transformer() as transformer: - for stream in catalog.get_selected_streams(ctx.state): + for stream in ctx.catalog.get_selected_streams(ctx.state): tap_stream_id = stream.tap_stream_id stream_schema = stream.schema.to_dict() stream_metadata = metadata.to_map(stream.metadata) diff --git a/tap_zendesk_chat/utils.py b/tap_zendesk_chat/utils.py index a79b51f..af22419 100644 --- a/tap_zendesk_chat/utils.py +++ b/tap_zendesk_chat/utils.py @@ -1,22 +1,13 @@ #!/usr/bin/env python3 -import os from datetime import datetime, timedelta - +from pathlib import Path import singer from singer.utils import load_json, strptime_to_utc - -def get_abs_path(path): - return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) - - def load_schema(tap_stream_id): - path = f"schemas/{tap_stream_id}.json" - schema = load_json(get_abs_path(path)) + schema = load_json(Path(__file__).parent.resolve()/f"schemas/{tap_stream_id}.json") dependencies = schema.pop("tap_schema_dependencies", []) - refs = {} - for sub_stream_id in dependencies: - refs[sub_stream_id] = load_schema(sub_stream_id) + refs = {ref:load_schema(ref) for ref in dependencies} if refs: singer.resolve_schema_references(schema, refs) return schema From 39661eb41296845779da0f64d27f48062d4a9bdf Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 10:58:29 +0530 Subject: [PATCH 25/39] fixed review comments --- tap_zendesk_chat/discover.py | 22 ++++++---------------- tap_zendesk_chat/http.py | 2 +- tap_zendesk_chat/streams.py | 8 +++----- tap_zendesk_chat/sync.py | 2 +- 4 files changed, 11 insertions(+), 23 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 0f0ea8d..becacef 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -2,7 +2,6 @@ from requests.exceptions import HTTPError from singer import metadata from singer.catalog import Catalog - from .http import Client from .streams import STREAMS from .utils import load_schema @@ -26,20 +25,6 @@ def account_not_authorized(client): raise return False - -def build_metadata(raw_schema: dict, stream): - mdata = metadata.new() - metadata.write(mdata, (), "valid-replication-keys", list(stream.valid_replication_keys)) - metadata.write(mdata, (), "table-key-properties", list(stream.key_properties)) - metadata.write(mdata, (), "forced-replication-method", stream.forced_replication_method) - for prop in raw_schema["properties"].keys(): - if (prop in stream.valid_replication_keys) or (prop in stream.key_properties): - metadata.write(mdata, ("properties", prop), "inclusion", "automatic") - else: - metadata.write(mdata, ("properties", prop), "inclusion", "available") - return metadata.to_list(mdata) - - def discover(config: dict) -> Catalog: """discover function for tap-zendesk-chat.""" if config: @@ -55,7 +40,12 @@ def discover(config: dict) -> Catalog: "stream": stream_name, "tap_stream_id": stream.tap_stream_id, "schema": schema, - "metadata": build_metadata(schema, stream), + "metadata": metadata.get_standard_metadata( + schema,stream_name, + list(stream.key_properties), + list(stream.valid_replication_keys), + stream.forced_replication_method + ) } ) return Catalog.from_dict({"streams": streams}) diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index 9261768..de2189d 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -23,7 +23,7 @@ def __init__(self, config): def request(self, tap_stream_id, params=None, url=None, url_extra=""): with metrics.http_request_timer(tap_stream_id) as timer: - url = f"{url or BASE_URL}/api/v2/{tap_stream_id}{url_extra}" + url = url or f"{BASE_URL}/api/v2/{tap_stream_id}{url_extra}" LOGGER.info("calling %s %s", url, params) response = self.session.get(url,headers=self.headers,params=params) timer.tags[metrics.Tag.http_status_code] = response.status_code diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index 940d4b8..a8b8275 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -116,10 +116,6 @@ def _bulk_chats(self, ctx, chat_ids: List): body = ctx.client.request(self.tap_stream_id, params=params) return list(body["docs"].values()) - def _search(self, ctx, chat_type, ts_field, start_dt: datetime, end_dt: datetime): - params = {"q": f"type:{chat_type} AND {ts_field}:[{start_dt.isoformat()} TO {end_dt.isoformat()}]"} - return ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") - def _pull(self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metadata: Dict, transformer: Transformer): """Pulls and writes pages of data for the given chat_type, where chat_type can be either "chat" or "offline_msg". @@ -148,7 +144,9 @@ def _pull(self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metada if next_url: search_resp = ctx.client.request(self.tap_stream_id, url=next_url) else: - search_resp = self._search(ctx, chat_type, ts_field, start_dt, end_dt) + params = {"q": f"type:{chat_type} AND {ts_field}:[{start_dt.isoformat()} TO {end_dt.isoformat()}]"} + search_resp = ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") + next_url = search_resp["next_url"] ctx.set_bookmark(url_offset_key, next_url) ctx.write_state() diff --git a/tap_zendesk_chat/sync.py b/tap_zendesk_chat/sync.py index abae92f..a351612 100644 --- a/tap_zendesk_chat/sync.py +++ b/tap_zendesk_chat/sync.py @@ -23,7 +23,7 @@ def sync(ctx): LOGGER.info("Starting sync for stream: %s", tap_stream_id) ctx.state = set_currently_syncing(ctx.state, tap_stream_id) ctx.write_state() - write_schema(tap_stream_id, stream_schema, stream_obj.pk_fields, stream.replication_key) + write_schema(tap_stream_id, stream_schema, stream_obj.key_properties, stream.replication_key) stream_obj.sync(ctx, schema=stream_schema, stream_metadata=stream_metadata, transformer=transformer) ctx.write_state() From 80982d5225afa6272926f78cbc87651d144de80d Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 14:31:31 +0530 Subject: [PATCH 26/39] fixed discovery changes --- tap_zendesk_chat/discover.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index becacef..8989da4 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -1,6 +1,6 @@ import singer from requests.exceptions import HTTPError -from singer import metadata +from singer.metadata import write,to_list,to_map,get_standard_metadata from singer.catalog import Catalog from .http import Client from .streams import STREAMS @@ -25,6 +25,26 @@ def account_not_authorized(client): raise return False + +def get_metadata(schema: dict, stream): + """ + tweaked inbuilt singer method to also mark the replication keys as automatic fields + """ + stream_metadata = get_standard_metadata( + **{ + "schema": schema, + "key_properties": list(stream.key_properties), + "valid_replication_keys": list(stream.valid_replication_keys), + "replication_method": stream.forced_replication_method, + } + ) + stream_metadata = to_map(stream_metadata) + if stream.valid_replication_keys is not None: + for key in stream.valid_replication_keys: + stream_metadata = write(stream_metadata, ("properties", key), "inclusion", "automatic") + stream_metadata = to_list(stream_metadata) + return stream_metadata + def discover(config: dict) -> Catalog: """discover function for tap-zendesk-chat.""" if config: @@ -40,12 +60,7 @@ def discover(config: dict) -> Catalog: "stream": stream_name, "tap_stream_id": stream.tap_stream_id, "schema": schema, - "metadata": metadata.get_standard_metadata( - schema,stream_name, - list(stream.key_properties), - list(stream.valid_replication_keys), - stream.forced_replication_method - ) + "metadata": get_metadata(schema, stream), } ) return Catalog.from_dict({"streams": streams}) From ffa3806ce1e82fe5e502d2c1c204d9162fffb12c Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 14:37:34 +0530 Subject: [PATCH 27/39] removed duplicate function --- tap_zendesk_chat/context.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index 381f07b..da06526 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -1,6 +1,6 @@ from datetime import datetime from typing import Dict, List - +from singer.bookmarks import ensure_bookmark_path from singer import Catalog, write_state from singer.utils import now @@ -29,11 +29,8 @@ def bookmark(self, path: List): """checks the state[file] for a nested path of bookmarks and returns value.""" bookmark = self.bookmarks - for key in path: - if key not in bookmark: - bookmark[key] = {} - bookmark = bookmark[key] - return bookmark + return ensure_bookmark_path(bookmark,path) + def set_bookmark(self, path, val): if isinstance(val, datetime): From 0adfeec192ecbfdef7847f9e3edd539c641706d2 Mon Sep 17 00:00:00 2001 From: Vishal Date: Mon, 14 Nov 2022 16:30:06 +0530 Subject: [PATCH 28/39] Tdl 18828 add integration tests (#47) * add automation fields * fixed import * fixed test * added test for all_fields and automatic fields * added base for interuptible sync * formatted tests and added interuptible sync * added pagination test * fix bookmark test * fixed bookmark test * fixed issue with 400 * fixed integration test messages * added exception to chats stream * fixed automatic field for chats stream * fixed formatting changes * fixed missing key issue * updated automatic fields test * fixed auto field condition * fixed pylint issue --- README.md | 4 +- tests/base.py | 302 ++++++++++++------------ tests/test_all_fields.py | 93 ++++++++ tests/test_automatic_fields.py | 73 ++++++ tests/test_bookmarks.py | 168 +++++++------ tests/test_discovery.py | 128 +++++----- tests/test_interupted_sync.py | 100 ++++++++ tests/test_pagination.py | 87 +++++++ tests/test_start_date.py | 82 +++---- tests/unittests/test_auth_discovery.py | 39 ++- tests/unittests/test_context.py | 40 ++-- tests/unittests/test_http_exceptions.py | 32 +-- tests/unittests/test_utils.py | 75 +++--- 13 files changed, 774 insertions(+), 449 deletions(-) create mode 100644 tests/test_all_fields.py create mode 100644 tests/test_automatic_fields.py create mode 100644 tests/test_interupted_sync.py create mode 100644 tests/test_pagination.py diff --git a/README.md b/README.md index 6a9e278..573666c 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,8 @@ authorization request), log into your Zendesk Chat / Zopim account, go to Settings -> Account -> API -> Add API Client Once you create the API Client you will receive a client ID and client secret. -Use these in conjunction with your chose method of performing the OAuth 2 -reqeust to obtain an access token to your (or a third-party) Zendesk Chat / +Use these in conjunction with your choice method of performing the OAuth 2 +request to obtain an access token to your (or a third-party) Zendesk Chat / Zopim account. 3. Create the Config File diff --git a/tests/base.py b/tests/base.py index af864be..974d5be 100644 --- a/tests/base.py +++ b/tests/base.py @@ -1,24 +1,16 @@ -""" -Setup expectations for test sub classes -Run discovery for as a prerequisite for most tests -""" -import unittest -import os +"""Setup expectations for test sub classes Run discovery for as a prerequisite +for most tests.""" import json -import decimal +import os +import unittest from datetime import datetime as dt from datetime import timezone as tz -from singer import utils +from typing import Dict, Set from tap_tester import connections, menagerie, runner class BaseTapTest(unittest.TestCase): - """ - Setup expectations for test sub classes - Run discovery for as a prerequisite for most tests - """ - REPLICATION_KEYS = "valid-replication-keys" PRIMARY_KEYS = "table-key-properties" REPLICATION_METHOD = "forced-replication-method" @@ -28,20 +20,18 @@ class BaseTapTest(unittest.TestCase): @staticmethod def tap_name(): - """The name of the tap""" + """The name of the tap.""" return "tap-zendesk-chat" @staticmethod def get_type(): - """the expected url route ending""" + """the expected url route ending.""" return "platform.zendesk-chat" def get_properties(self, original: bool = True): """Configuration properties required for the tap.""" - return_value = { - 'start_date': dt.strftime(dt.today(), self.START_DATE_FORMAT) - } + return_value = {"start_date": dt.strftime(dt.today(), self.START_DATE_FORMAT)} if original: return return_value @@ -55,40 +45,25 @@ def get_properties(self, original: bool = True): @staticmethod def get_credentials(): - """Authentication information for the test account""" - return { - 'access_token': os.getenv('TAP_ZENDESK_CHAT_ACCESS_TOKEN') - } + """Authentication information for the test account.""" + return {"access_token": os.getenv("TAP_ZENDESK_CHAT_ACCESS_TOKEN")} def expected_metadata(self): - """The expected streams and metadata about the streams""" + """The expected streams and metadata about the streams.""" default = { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_METHOD: self.FULL, } - shortcuts_rep_key = { - self.PRIMARY_KEYS: {"name"}, - self.REPLICATION_METHOD: self.FULL - } + shortcuts_rep_key = {self.PRIMARY_KEYS: {"name"}, self.REPLICATION_METHOD: self.FULL} - - account_rep_key = { - self.PRIMARY_KEYS: {"account_key"}, - self.REPLICATION_METHOD: self.FULL - } + account_rep_key = {self.PRIMARY_KEYS: {"account_key"}, self.REPLICATION_METHOD: self.FULL} chats_rep_key = { self.PRIMARY_KEYS: {"id"}, - self.REPLICATION_KEYS: {'timestamp', 'end_timestamp'}, - self.REPLICATION_METHOD: self.INCREMENTAL - } - - agents_rep_key = { - self.PRIMARY_KEYS: {"id"}, - self.REPLICATION_METHOD: self.FULL, - self.REPLICATION_KEYS: {'id'} + self.REPLICATION_KEYS: {"timestamp", "end_timestamp"}, + self.REPLICATION_METHOD: self.INCREMENTAL, } return { @@ -103,51 +78,55 @@ def expected_metadata(self): } def expected_streams(self): - """A set of expected stream names""" + """A set of expected stream names.""" return set(self.expected_metadata().keys()) def expected_primary_keys(self): - """ - return a dictionary with key of table name - and value as a set of primary key fields - """ - return {table: properties.get(self.PRIMARY_KEYS, set()) - for table, properties in self.expected_metadata().items()} + """return a dictionary with key of table name and value as a set of + primary key fields.""" + return { + table: properties.get(self.PRIMARY_KEYS, set()) for table, properties in self.expected_metadata().items() + } def expected_replication_keys(self): - """ - return a dictionary with key of table name - and value as a set of replication key fields - """ - return {table: properties.get(self.REPLICATION_KEYS, set()) - for table, properties in self.expected_metadata().items()} + """return a dictionary with key of table name and value as a set of + replication key fields.""" + return { + table: properties.get(self.REPLICATION_KEYS, set()) + for table, properties in self.expected_metadata().items() + } def expected_automatic_fields(self): - return {table: self.expected_primary_keys().get(table) | self.expected_replication_keys().get(table) - for table in self.expected_metadata()} + return { + table: self.expected_primary_keys().get(table) | self.expected_replication_keys().get(table) + for table in self.expected_metadata() + } def expected_replication_method(self): - """return a dictionary with key of table name nd value of replication method""" - return {table: properties.get(self.REPLICATION_METHOD, None) - for table, properties - in self.expected_metadata().items()} + """return a dictionary with key of table name and value of replication + method.""" + return { + table: properties.get(self.REPLICATION_METHOD, None) + for table, properties in self.expected_metadata().items() + } def setUp(self): - """Verify that you have set the prerequisites to run the tap (creds, etc.)""" - env_keys = {'TAP_ZENDESK_CHAT_ACCESS_TOKEN'} + """Verify that you have set the prerequisites to run the tap (creds, + etc.)""" + env_keys = {"TAP_ZENDESK_CHAT_ACCESS_TOKEN"} missing_envs = [x for x in env_keys if os.getenv(x) is None] if missing_envs: - raise Exception("Set environment variables: {}".format(missing_envs)) + raise Exception(f"Set environment variables: {missing_envs}") ######################### # Helper Methods # ######################### def run_sync(self, conn_id): - """ - Run a sync job and make sure it exited properly. - Return a dictionary with keys of streams synced - and values of records synced for each stream + """Run a sync job and make sure it exited properly. + + Return a dictionary with keys of streams synced and values of + records synced for each stream """ # Run a sync job using orchestrator sync_job_name = runner.run_sync_mode(self, conn_id) @@ -158,14 +137,14 @@ def run_sync(self, conn_id): # Verify actual rows were synced sync_record_count = runner.examine_target_output_file( - self, conn_id, self.expected_streams(), self.expected_primary_keys()) + self, conn_id, self.expected_streams(), self.expected_primary_keys() + ) return sync_record_count @staticmethod def local_to_utc(date: dt): - """Convert a datetime with timezone information to utc""" - utc = dt(date.year, date.month, date.day, date.hour, date.minute, - date.second, date.microsecond, tz.utc) + """Convert a datetime with timezone information to utc.""" + utc = dt(date.year, date.month, date.day, date.hour, date.minute, date.second, date.microsecond, tz.utc) if date.tzinfo and hasattr(date.tzinfo, "_offset"): utc += date.tzinfo._offset @@ -173,37 +152,40 @@ def local_to_utc(date: dt): return utc def max_bookmarks_by_stream(self, sync_records): - """ - Return the maximum value for the replication key for the events stream - which is the bookmark expected value for updated records. + """Return the maximum value for the replication key for the events + stream which is the bookmark expected value for updated records. - Comparisons are based on the class of the bookmark value. Dates will be - string compared which works for ISO date-time strings. + Comparisons are based on the class of the bookmark value. Dates + will be string compared which works for ISO date-time strings. """ max_bookmarks = {} chats_offline = [] chats = [] for stream, batch in sync_records.items(): - upsert_messages = [m for m in batch.get('messages') if m['action'] == 'upsert'] + upsert_messages = [m for m in batch.get("messages") if m["action"] == "upsert"] if stream == "chats": for msg in upsert_messages: - if msg['data']['type'] == 'chat': + if msg["data"]["type"] == "chat": chats.append(msg) - elif msg['data']['type'] == 'offline_msg': + elif msg["data"]["type"] == "offline_msg": chats_offline.append(msg) else: - raise RuntimeError("Got unexpected chat type: " + msg['data']['type']) + raise RuntimeError("Got unexpected chat type: " + msg["data"]["type"]) chats_bookmark_key = "end_timestamp" chats_offline_bookmark_key = "timestamp" bk_values_chats = [message["data"].get(chats_bookmark_key) for message in chats] bk_values_chats_offline = [message["data"].get(chats_offline_bookmark_key) for message in chats_offline] - max_bookmarks['chats.chat'] = {chats_bookmark_key : max(bk_values_chats, default=None)} - max_bookmarks['chats.offline_msg'] = {chats_offline_bookmark_key : max(bk_values_chats_offline, default=None)} + max_bookmarks["chats.chat"] = {chats_bookmark_key: max(bk_values_chats, default=None)} + max_bookmarks["chats.offline_msg"] = { + chats_offline_bookmark_key: max(bk_values_chats_offline, default=None) + } else: stream_bookmark_key = self.expected_replication_keys().get(stream) or set() with self.subTest(stream=stream): - assert not stream_bookmark_key or len(stream_bookmark_key) == 1 # There shouldn't be a compound replication key + assert ( + not stream_bookmark_key or len(stream_bookmark_key) == 1 + ) # There shouldn't be a compound replication key if not stream_bookmark_key: continue stream_bookmark_key = stream_bookmark_key.pop() @@ -221,34 +203,35 @@ def max_bookmarks_by_stream(self, sync_records): max_bookmarks[stream][stream_bookmark_key] = bk_value return max_bookmarks - def min_bookmarks_by_stream(self, sync_records): - """ - Return the minimum value for the replication key for each stream - """ + """Return the minimum value for the replication key for each stream.""" min_bookmarks = {} chats = [] chats_offline = [] for stream, batch in sync_records.items(): - upsert_messages = [m for m in batch.get('messages') if m['action'] == 'upsert'] + upsert_messages = [m for m in batch.get("messages") if m["action"] == "upsert"] if stream == "chats": for msg in upsert_messages: - if msg['data']['type'] == 'chat': + if msg["data"]["type"] == "chat": chats.append(msg) - elif msg['data']['type'] == 'offline_msg': + elif msg["data"]["type"] == "offline_msg": chats_offline.append(msg) else: - raise RuntimeError("Got unexpected chat type: " + msg['data']['type']) + raise RuntimeError("Got unexpected chat type: " + msg["data"]["type"]) chats_bookmark_key = "end_timestamp" chats_offline_bookmark_key = "timestamp" bk_values_chats = [message["data"].get(chats_bookmark_key) for message in chats] bk_values_chats_offline = [message["data"].get(chats_offline_bookmark_key) for message in chats_offline] - min_bookmarks['chats.chat'] = {chats_bookmark_key : min(bk_values_chats, default=None)} - min_bookmarks['chats.offline_msg'] = {chats_offline_bookmark_key : min(bk_values_chats_offline, default=None)} + min_bookmarks["chats.chat"] = {chats_bookmark_key: min(bk_values_chats, default=None)} + min_bookmarks["chats.offline_msg"] = { + chats_offline_bookmark_key: min(bk_values_chats_offline, default=None) + } else: stream_bookmark_key = self.expected_replication_keys().get(stream) or set() with self.subTest(stream=stream): - assert not stream_bookmark_key or len(stream_bookmark_key) == 1 # There shouldn't be a compound replication key + assert ( + not stream_bookmark_key or len(stream_bookmark_key) == 1 + ) # There shouldn't be a compound replication key if not stream_bookmark_key: continue stream_bookmark_key = stream_bookmark_key.pop() @@ -264,36 +247,32 @@ def min_bookmarks_by_stream(self, sync_records): if bk_value < min_bookmarks[stream][stream_bookmark_key]: min_bookmarks[stream][stream_bookmark_key] = bk_value - print(min_bookmarks) return min_bookmarks - def select_all_streams_and_fields(self, conn_id, catalogs, select_all_fields: bool = True, exclude_streams=None): - """Select all streams and all fields within streams""" + """Select all streams and all fields within streams.""" for catalog in catalogs: - if exclude_streams and catalog.get('stream_name') in exclude_streams: + if exclude_streams and catalog.get("stream_name") in exclude_streams: continue - schema = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + schema = menagerie.get_annotated_schema(conn_id, catalog["stream_id"]) non_selected_properties = [] if not select_all_fields: # get a list of all properties so that none are selected - non_selected_properties = schema.get('annotated-schema', {}).get( - 'properties', {}) + non_selected_properties = schema.get("annotated-schema", {}).get("properties", {}) # remove properties that are automatic - for prop in self.expected_automatic_fields().get(catalog['stream_name'], []): + for prop in self.expected_automatic_fields().get(catalog["stream_name"], []): if prop in non_selected_properties: del non_selected_properties[prop] non_selected_properties = non_selected_properties.keys() additional_md = [] connections.select_catalog_and_fields_via_metadata( - conn_id, catalog, schema, additional_md=additional_md, - non_selected_fields=non_selected_properties + conn_id, catalog, schema, additional_md=additional_md, non_selected_fields=non_selected_properties ) def create_connection(self, original_properties: bool = True, original_credentials: bool = True): - """Create a new connection with the test name""" + """Create a new connection with the test name.""" # Create the connection conn_id = connections.ensure_connection(self, original_properties, original_credentials) @@ -309,18 +288,18 @@ def create_connection(self, original_properties: bool = True, original_credentia def get_selected_fields_from_metadata(metadata): selected_fields = set() for field in metadata: - is_field_metadata = len(field['breadcrumb']) > 1 - inclusion_automatic_or_selected = (field['metadata']['inclusion'] == 'automatic' - or field['metadata']['selected'] is True) - if is_field_metadata and inclusion_automatic_or_selected: - selected_fields.add(field['breadcrumb'][1]) + is_field_metadata = len(field["breadcrumb"]) > 1 + if is_field_metadata: + inclusion_automatic_or_selected = ( + field["metadata"]["inclusion"] == "automatic" or field["metadata"]["selected"] is True + ) + if inclusion_automatic_or_selected: + selected_fields.add(field["breadcrumb"][1]) return selected_fields - def run_and_verify_check_mode(self, conn_id): - """ - Run the tap in check mode and verify it succeeds. - This should be ran prior to field selection and initial sync. + """Run the tap in check mode and verify it succeeds. This should be ran + prior to field selection and initial sync. Return the connection id and found catalogs from menagerie. """ @@ -333,23 +312,20 @@ def run_and_verify_check_mode(self, conn_id): found_catalogs = menagerie.get_catalogs(conn_id) - self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id)) - found_catalog_names = set(map(lambda c: c['tap_stream_id'], found_catalogs)) + self.assertGreater(len(found_catalogs), 0, msg=f"unable to locate schemas for connection {conn_id}") + found_catalog_names = set(map(lambda c: c["tap_stream_id"], found_catalogs)) diff = self.expected_streams().symmetric_difference(found_catalog_names) - self.assertEqual(len(diff), 0, msg="discovered schemas do not match: {}".format(diff)) - print("discovered schemas are OK") - + self.assertEqual(len(diff), 0, msg=f"discovered schemas do not match: {diff}") return found_catalogs def run_and_verify_sync(self, conn_id, clear_state=False): - """ - Clear the connections state in menagerie and Run a Sync. - Verify the exit code following the sync. + """Clear the connections state in menagerie and Run a Sync. Verify the + exit code following the sync. Return the connection id and record count by stream """ if clear_state: - #clear state + # clear state menagerie.set_state(conn_id, {}) # run sync @@ -360,52 +336,57 @@ def run_and_verify_sync(self, conn_id, clear_state=False): menagerie.verify_sync_exit_status(self, exit_status, sync_job_name) # read target output - record_count_by_stream = runner.examine_target_output_file(self, conn_id, - self.expected_streams(), - self.expected_primary_keys()) + record_count_by_stream = runner.examine_target_output_file( + self, conn_id, self.expected_streams(), self.expected_primary_keys() + ) return record_count_by_stream - def perform_and_verify_table_and_field_selection(self, conn_id, found_catalogs, streams_to_select, select_all_fields=True): - """ - Perform table and field selection based off of the streams to select set and field selection parameters. - Verfify this results in the expected streams selected and all or no fields selected for those streams. + def perform_and_verify_table_and_field_selection( + self, conn_id, found_catalogs, streams_to_select, select_all_fields=True + ): + """Perform table and field selection based off of the streams to select + set and field selection parameters. + + Verify this results in the expected streams selected and all or + no fields selected for those streams. """ # Select all available fields or select no fields from all testable streams exclude_streams = self.expected_streams().difference(streams_to_select) self.select_all_streams_and_fields( - conn_id=conn_id, catalogs=found_catalogs, select_all_fields=select_all_fields, exclude_streams=exclude_streams + conn_id=conn_id, + catalogs=found_catalogs, + select_all_fields=select_all_fields, + exclude_streams=exclude_streams, ) catalogs = menagerie.get_catalogs(conn_id) # Ensure our selection worked for cat in catalogs: - catalog_entry = menagerie.get_annotated_schema(conn_id, cat['stream_id']) + catalog_entry = menagerie.get_annotated_schema(conn_id, cat["stream_id"]) # Verify all testable streams are selected - selected = catalog_entry.get('annotated-schema').get('selected') - print("Validating selection on {}: {}".format(cat['stream_name'], selected)) - if cat['stream_name'] not in streams_to_select: + selected = catalog_entry.get("annotated-schema").get("selected") + if cat["stream_name"] not in streams_to_select: self.assertFalse(selected, msg="Stream selected, but not testable.") - continue # Skip remaining assertions if we aren't selecting this stream + continue # Skip remaining assertions if we aren't selecting this stream self.assertTrue(selected, msg="Stream not selected.") if select_all_fields: # Verify all fields within each selected stream are selected - for field, field_props in catalog_entry.get('annotated-schema').get('properties').items(): - field_selected = field_props.get('selected') - print("\tValidating selection on {}.{}: {}".format(cat['stream_name'], field, field_selected)) + for field, field_props in catalog_entry.get("annotated-schema").get("properties").items(): + field_selected = field_props.get("selected") self.assertTrue(field_selected, msg="Field not selected.") else: # Verify only automatic fields are selected - expected_automatic_fields = self.expected_automatic_fields().get(cat['tap_stream_id']) - selected_fields = self.get_selected_fields_from_metadata(catalog_entry['metadata']) + expected_automatic_fields = self.expected_automatic_fields().get(cat["tap_stream_id"]) + selected_fields = self.get_selected_fields_from_metadata(catalog_entry["metadata"]) self.assertEqual(expected_automatic_fields, selected_fields) def expected_schema_keys(self, stream): - props = self._load_schemas(stream).get(stream).get('properties') + props = self._load_schemas(stream).get(stream).get("properties") if not props: - props = self._load_schemas(stream, shared=True).get(stream).get('properties') + props = self._load_schemas(stream, shared=True).get(stream).get("properties") assert props, "schema not configured proprerly" @@ -420,7 +401,7 @@ def _load_schemas(self, stream, shared: bool = False): file_name = "shared/" + stream[:-1] + ".json" if shared else stream + ".json" path = self._get_abs_path("schemas") + "/" + file_name - final_path = path.replace('tests', self.tap_name().replace('-', '_')) + final_path = path.replace("tests", self.tap_name().replace("-", "_")) with open(final_path) as file: schemas[stream] = json.load(file) @@ -429,5 +410,36 @@ def _load_schemas(self, stream, shared: bool = False): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.start_date = self.get_properties().get('start_date') - self.maxDiff=None + self.start_date = self.get_properties().get("start_date") + self.maxDiff = None + + def create_interrupt_sync_state( + self, state: Dict, interrupt_stream: str, pending_streams: Set, start_date: str + ) -> Dict: + """This function will create a new interrupt sync bookmark state.""" + expected_replication_keys = self.expected_replication_keys() + bookmark_state = state["bookmarks"] + if self.expected_metadata()[interrupt_stream][self.REPLICATION_METHOD] == self.INCREMENTAL: + replication_key = next(iter(expected_replication_keys[interrupt_stream])) + bookmark_date = bookmark_state[interrupt_stream][replication_key] + updated_bookmark_date = self.get_mid_point_date(start_date, bookmark_date) + bookmark_state[interrupt_stream][replication_key] = updated_bookmark_date + state["currently_syncing"] = interrupt_stream + # For pending streams, update the bookmark_value to start-date + for stream in iter(pending_streams): + # Only incremental streams should have the bookmark value + if self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL: + replication_key = next(iter(expected_replication_keys[stream])) + bookmark_state[stream][replication_key] = start_date + state["bookmarks"] = bookmark_state + return state + + def get_mid_point_date(self, start_date: str, bookmark_date: str) -> str: + """Function to find the middle date between two dates.""" + date_format = "%Y-%m-%dT%H:%M:%S.%fZ" + start_date_dt = dt.strptime(start_date, date_format) + bookmark_date_dt = dt.strptime(bookmark_date, date_format) + mid_date_dt = start_date_dt.date() + (bookmark_date_dt - start_date_dt) / 2 + # Convert datetime object to string format + mid_date = mid_date_dt.strftime(date_format) + return mid_date diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py new file mode 100644 index 0000000..7c1847c --- /dev/null +++ b/tests/test_all_fields.py @@ -0,0 +1,93 @@ +"""Test that with no fields selected for a stream automatic fields are still +replicated.""" +from base import BaseTapTest +from tap_tester import connections, menagerie, runner + + +class TestZendeskChatAllFields(BaseTapTest): + """Test that all fields selected for a stream are replicated.""" + + @staticmethod + def name(): + return "tap_tester_zendesk_chat_all_fields" + + def test_run(self): + """ + - Verify no unexpected streams were replicated + - Verify that more than just the automatic fields are replicated for each stream. + - Verify all fields for each stream are replicated + """ + expected_streams = self.expected_streams() + conn_id = connections.ensure_connection(self) + found_catalogs = self.run_and_verify_check_mode(conn_id) + catalog_entries = [catalog for catalog in found_catalogs if catalog.get("stream_name") in expected_streams] + self.perform_and_verify_table_and_field_selection( + conn_id, catalog_entries, expected_streams, select_all_fields=True + ) + stream_all_fields = dict() + + for catalog in catalog_entries: + stream_id, stream_name = catalog["stream_id"], catalog["stream_name"] + catalog_entry = menagerie.get_annotated_schema(conn_id, stream_id) + fields_from_field_level_md = [ + md_entry["breadcrumb"][1] for md_entry in catalog_entry["metadata"] if md_entry["breadcrumb"] != [] + ] + stream_all_fields[stream_name] = set(fields_from_field_level_md) + + record_count_by_stream = self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + for stream in expected_streams: + with self.subTest(stream=stream): + + expected_all_keys = stream_all_fields[stream] + expected_automatic_keys = self.expected_automatic_fields().get(stream) + data = synced_records.get(stream) + actual_all_keys = set() + for message in data["messages"]: + if message["action"] == "upsert": + actual_all_keys.update(message["data"].keys()) + + self.assertTrue( + expected_automatic_keys.issubset(expected_all_keys), + msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"', + ) + self.assertGreater(len(expected_all_keys), len(expected_automatic_keys)) + expected_all_keys = expected_all_keys - self.KNOWN_MISSING_FIELDS.get(stream, set()) + self.assertGreater( + record_count_by_stream.get(stream, -1), + 0, + msg="The number of records is not over the stream max limit", + ) + self.assertSetEqual(expected_all_keys, actual_all_keys) + + KNOWN_MISSING_FIELDS = { + "agents": { + "scope", + }, + "account": { + "billing", + }, + "shortcuts": { + "departments", + "agents", + }, + } + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + + return_value = { + "start_date": "2017-01-15T00:00:00Z", + "chat_search_interval_days": 500, + } + + if original: + return return_value + + # Start Date test needs the new connections start date to be prior to the default + assert self.start_date < return_value["start_date"] + + # Assign start date to be the default + return_value["start_date"] = self.start_date + return return_value diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py new file mode 100644 index 0000000..fc1689e --- /dev/null +++ b/tests/test_automatic_fields.py @@ -0,0 +1,73 @@ +"""Test that with no fields selected for a stream automatic fields are still +replicated.""" +from base import BaseTapTest +from tap_tester import connections, runner +from tap_tester.logger import LOGGER + + +class TestZendeskChatAutomaticFields(BaseTapTest): + """Test that with no fields selected for a stream automatic fields are + still replicated.""" + + @staticmethod + def name(): + return "tap_tester_zendesk_chat_automatic_fields" + + def test_run(self): + """ + - Verify we can deselect all fields except when inclusion=automatic, which is handled by base.py methods + - Verify that only the automatic fields are sent to the target. + - Verify that all replicated records have unique primary key values. + """ + + expected_streams = self.expected_streams() + + conn_id = connections.ensure_connection(self) + found_catalogs = self.run_and_verify_check_mode(conn_id) + catalog_entries = [catalog for catalog in found_catalogs if catalog.get("stream_name") in expected_streams] + self.perform_and_verify_table_and_field_selection( + conn_id, catalog_entries, expected_streams, select_all_fields=False + ) + + # run initial sync + record_count_by_stream = self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + + for stream in expected_streams: + with self.subTest(stream=stream): + + expected_keys = self.expected_automatic_fields().get(stream) + + data = synced_records.get(stream, {}) + record_messages_keys = [set(row["data"].keys()) for row in data["messages"]] + + self.assertGreater( + record_count_by_stream.get(stream, -1), + 0, + msg="The number of records is not over the stream max limit", + ) + if stream == "chats": + expected_keys_offline_msg = self.expected_automatic_fields().get(stream) - {"end_timestamp"} + for actual_keys in record_messages_keys: + self.assertTrue(actual_keys == expected_keys_offline_msg or actual_keys == expected_keys) + else: + for actual_keys in record_messages_keys: + self.assertSetEqual(expected_keys, actual_keys) + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + + return_value = { + "start_date": "2017-01-15T00:00:00Z", + "chat_search_interval_days": 500, + } + + if original: + return return_value + + # Start Date test needs the new connections start date to be prior to the default + assert self.start_date < return_value["start_date"] + + # Assign start date to be the default + return_value["start_date"] = self.start_date + return return_value diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py index 5287853..efa5ab6 100644 --- a/tests/test_bookmarks.py +++ b/tests/test_bookmarks.py @@ -1,53 +1,35 @@ -import os -import datetime -import dateutil.parser -import pytz - -from tap_tester import runner, menagerie, connections - from base import BaseTapTest +from tap_tester import connections, menagerie, runner +STREAMS_WITH_BOOKMARKS = ["agents", "chats"] -STREAMS_WITH_BOOKMARKS = ['agents', 'chats'] -class BookmarksTest(BaseTapTest): - - expected_record_count = { - 'agents': 3, - 'chats': 223, - 'bans': 22, - 'account': 1, - 'shortcuts': 4, - 'triggers': 12, - 'departments': 1, - 'goals': 2, - } +class TestZendeskChatBookmarks(BaseTapTest): + """Test tap sets a bookmark and respects it for the next sync of a + stream.""" @staticmethod def name(): return "tap_tester_zendesk_chat_bookmarks" - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - return_value = { - 'start_date': '2017-08-15T00:00:00Z', - 'agents_page_limit': 1, - } - if original: - return return_value - - return_value["start_date"] = self.start_date - - return return_value - - def test_run(self): - expected_streams = self.expected_streams() + """ + - Verify that for each stream you can do a sync which records bookmarks. + - Verify that the bookmark is the maximum value sent to the target for the replication key. + - Verify that a second sync respects the bookmark + All data of the second sync is >= the bookmark from the first sync + The number of records in the 2nd sync is less then the first + - Verify that for full table stream, all data replicated in sync 1 is replicated again in sync 2. + + PREREQUISITE + For EACH stream that is incrementally replicated there are multiple rows of data with + different values for the replication key + """ + expected_streams = self.expected_streams() # Testing against ads insights objects - self.start_date = self.get_properties()['start_date'] + self.start_date = self.get_properties()["start_date"] - """A Parametrized Bookmarks Test""" expected_replication_keys = self.expected_replication_keys() expected_replication_methods = self.expected_replication_method() @@ -61,15 +43,16 @@ def test_run(self): found_catalogs = self.run_and_verify_check_mode(conn_id) # Select only the expected streams tables - catalog_entries = [ce for ce in found_catalogs if ce['tap_stream_id'] in expected_streams] - self.perform_and_verify_table_and_field_selection(conn_id, catalog_entries, expected_streams, select_all_fields=True) + catalog_entries = [ce for ce in found_catalogs if ce["tap_stream_id"] in expected_streams] + self.perform_and_verify_table_and_field_selection( + conn_id, catalog_entries, expected_streams, select_all_fields=True + ) # Run a sync job using orchestrator first_sync_record_count = self.run_and_verify_sync(conn_id) first_sync_records = runner.get_records_from_target_output() first_sync_bookmarks = menagerie.get_state(conn_id) - ########################################################################## ### Second Sync ########################################################################## @@ -91,96 +74,98 @@ def test_run(self): # collect information for assertions from syncs 1 & 2 base on expected values first_sync_count = first_sync_record_count.get(stream, 0) second_sync_count = second_sync_record_count.get(stream, 0) - first_sync_messages = [record.get('data') for record in - first_sync_records.get(stream).get('messages') - if record.get('action') == 'upsert'] - second_sync_messages = [record.get('data') for record in - second_sync_records.get(stream).get('messages') - if record.get('action') == 'upsert'] - first_bookmark_key_value = first_sync_bookmarks.get('bookmarks', {}).get(stream) - second_bookmark_key_value = second_sync_bookmarks.get('bookmarks', {}).get(stream) - - # Assert we synced the expected number of records. Ensures pagination happens - self.assertEqual(first_sync_count, self.expected_record_count[stream]) - - - if expected_replication_method == self.INCREMENTAL: # chats is the only incremental stream - - # collect information specific to incremental streams from syncs 1 & 2 - replication_key = next(iter(expected_replication_keys[stream])) + first_sync_messages = [ + record.get("data") + for record in first_sync_records.get(stream).get("messages") + if record.get("action") == "upsert" + ] + second_sync_messages = [ + record.get("data") + for record in second_sync_records.get(stream).get("messages") + if record.get("action") == "upsert" + ] + first_bookmark_key_value = first_sync_bookmarks.get("bookmarks", {}).get(stream) + second_bookmark_key_value = second_sync_bookmarks.get("bookmarks", {}).get(stream) + + if expected_replication_method == self.INCREMENTAL: # chats is the only incremental stream # Verify the first sync sets a bookmark of the expected form self.assertIsNotNone(first_bookmark_key_value) - self.assertIsNotNone(first_bookmark_key_value.get('chat.end_timestamp')) - self.assertIsNotNone(first_bookmark_key_value.get('offline_msg.timestamp')) + self.assertIsNotNone(first_bookmark_key_value.get("chat.end_timestamp")) + self.assertIsNotNone(first_bookmark_key_value.get("offline_msg.timestamp")) # Verify the second sync sets a bookmark of the expected form self.assertIsNotNone(second_bookmark_key_value) - self.assertIsNotNone(second_bookmark_key_value.get('chat.end_timestamp')) - self.assertIsNotNone(second_bookmark_key_value.get('offline_msg.timestamp')) + self.assertIsNotNone(second_bookmark_key_value.get("chat.end_timestamp")) + self.assertIsNotNone(second_bookmark_key_value.get("offline_msg.timestamp")) # Verify the second sync bookmark is Equal to the first sync bookmark - self.assertEqual(second_bookmark_key_value, first_bookmark_key_value) # assumes no changes to data during test + self.assertEqual( + second_bookmark_key_value, first_bookmark_key_value + ) # assumes no changes to data during test for record in second_sync_messages: - if record.get('type') == 'chat': + if record.get("type") == "chat": # Verify the second sync records respect the previous (simulated) bookmark value - replication_key_value = record.get('end_timestamp') + replication_key_value = record.get("end_timestamp") # Verify the second sync bookmark value is the max replication key value for a given stream self.assertLessEqual( replication_key_value, - second_bookmark_key_value.get('chat.end_timestamp'), - msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + second_bookmark_key_value.get("chat.end_timestamp"), + msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced.", + ) - elif record.get('type') == 'offline_msg': + elif record.get("type") == "offline_msg": # Verify the second sync records respect the previous (simulated) bookmark value - replication_key_value = record.get('timestamp') + replication_key_value = record.get("timestamp") # Verify the second sync bookmark value is the max replication key value for a given stream self.assertLessEqual( replication_key_value, - second_bookmark_key_value.get('offline_msg.timestamp'), - msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + second_bookmark_key_value.get("offline_msg.timestamp"), + msg="Second sync bookmark was set incorrectly, a record with a greater replication-key value was synced.", + ) else: - assert(False) + assert False for record in first_sync_messages: - if record.get('type') == 'chat': + if record.get("type") == "chat": # Verify the first sync records respect the previous (simulated) bookmark value - replication_key_value = record.get('end_timestamp') + replication_key_value = record.get("end_timestamp") # Verify the second sync bookmark value is the max replication key value for a given stream self.assertLessEqual( replication_key_value, - first_bookmark_key_value.get('chat.end_timestamp'), - msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + first_bookmark_key_value.get("chat.end_timestamp"), + msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced.", + ) - elif record.get('type') == 'offline_msg': + elif record.get("type") == "offline_msg": # Verify the first sync records respect the previous (simulated) bookmark value - replication_key_value = record.get('timestamp') + replication_key_value = record.get("timestamp") # Verify the first sync bookmark value is the max replication key value for a given stream self.assertLessEqual( replication_key_value, - first_bookmark_key_value.get('offline_msg.timestamp'), - msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced.") + first_bookmark_key_value.get("offline_msg.timestamp"), + msg="First sync bookmark was set incorrectly, a record with a greater replication-key value was synced.", + ) else: - assert(False) + assert False # Verify the number of records in the 2nd sync is less then the first self.assertLess(second_sync_count, first_sync_count) - elif expected_replication_method == self.FULL: # Verify the number of records in the second sync is the same as the first self.assertEqual(second_sync_count, first_sync_count) - if stream in ('agents', 'bans'): - self.assertEqual(first_bookmark_key_value, second_bookmark_key_value, {'offset': {'id': None}}) + if stream in ("agents", "bans"): + self.assertEqual(first_bookmark_key_value, second_bookmark_key_value, {"offset": {"id": None}}) else: # Verify the syncs do not set a bookmark for full table streams self.assertIsNone(first_bookmark_key_value) @@ -188,7 +173,20 @@ def test_run(self): else: raise NotImplementedError( - "INVALID EXPECTATIONS\t\tSTREAM: {} REPLICATION_METHOD: {}".format(stream, expected_replication_method)) + "INVALID EXPECTATIONS\t\tSTREAM: {} REPLICATION_METHOD: {}".format( + stream, expected_replication_method + ) + ) # Verify at least 1 record was replicated in the second sync - self.assertGreater(second_sync_count, 0, msg="We are not fully testing bookmarking for {}".format(stream)) + self.assertGreater(second_sync_count, 0, msg=f"We are not fully testing bookmarking for {stream}") + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = {"start_date": "2017-01-15T00:00:00Z", "agents_page_limit": 1, "chat_search_interval_days": 2} + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 208fd0b..947a620 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -1,121 +1,109 @@ -""" -Test tap discovery -""" +"""Test tap discovery.""" import re -import unittest -from tap_tester import menagerie, connections from base import BaseTapTest +from tap_tester import connections, menagerie -class DiscoveryTest(BaseTapTest): - """ Test the tap discovery """ - +class TestZendeskChatDiscovery(BaseTapTest): @staticmethod def name(): - return "tap_tester_tap_zendesk_chat_discovery_test" + return "tap_tester_tap_zendesk_chat_discovery" def test_run(self): - """ - Verify that discover creates the appropriate catalog, schema, metadata, etc. - - • Verify number of actual streams discovered match expected - • Verify the stream names discovered were what we expect - • Verify stream names follow naming convention - streams should only have lowercase alphas and underscores - • verify there is only 1 top level breadcrumb - • verify replication key(s) - • verify primary key(s) - • verify the actual replication matches our expected replication method - • verify that primary, replication and foreign keys - are given the inclusion of automatic (metadata and annotated schema). - • verify that all other fields have inclusion of available (metadata and schema) + """Testing that discovery creates the appropriate catalog with valid + metadata. + + - Verify number of actual streams discovered match expected + - Verify the stream names discovered were what we expect + - Verify stream names follow naming convention streams should only have lowercase alphas and underscores + - verify there is only 1 top level breadcrumb + - verify primary key(s) + - verify that primary keys are given the inclusion of automatic. + - verify that all other fields have inclusion of available metadata. """ conn_id = connections.ensure_connection(self) # Verify number of actual streams discovered match expected found_catalogs = self.run_and_verify_check_mode(conn_id) - self.assertGreater(len(found_catalogs), 0, - msg="unable to locate schemas for connection {}".format(conn_id)) - self.assertEqual(len(found_catalogs), - len(self.expected_streams()), - msg="Expected {} streams, actual was {} for connection {}," - " actual {}".format( - len(self.expected_streams()), - len(found_catalogs), - found_catalogs, - conn_id)) + self.assertGreater(len(found_catalogs), 0, msg=f"unable to locate schemas for connection {conn_id}") + self.assertEqual( + len(found_catalogs), + len(self.expected_streams()), + msg="Expected {} streams, actual was {} for connection {}," + " actual {}".format(len(self.expected_streams()), len(found_catalogs), found_catalogs, conn_id), + ) # Verify the stream names discovered were what we expect - found_catalog_names = {c['tap_stream_id'] for c in found_catalogs} - self.assertEqual(set(self.expected_streams()), - set(found_catalog_names), - msg="Expected streams don't match actual streams") + found_catalog_names = {c["tap_stream_id"] for c in found_catalogs} + self.assertEqual( + set(self.expected_streams()), set(found_catalog_names), msg="Expected streams don't match actual streams" + ) # Verify stream names follow naming convention # streams should only have lowercase alphas and underscores - self.assertTrue(all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), - msg="One or more streams don't follow standard naming") + self.assertTrue( + all([re.fullmatch(r"[a-z_]+", name) for name in found_catalog_names]), + msg="One or more streams don't follow standard naming", + ) for stream in self.expected_streams(): with self.subTest(stream=stream): - catalog = next(iter([catalog for catalog in found_catalogs - if catalog["stream_name"] == stream])) + catalog = next(iter([catalog for catalog in found_catalogs if catalog["stream_name"] == stream])) assert catalog # based on previous tests this should always be found - schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog['stream_id']) + schema_and_metadata = menagerie.get_annotated_schema(conn_id, catalog["stream_id"]) metadata = schema_and_metadata["metadata"] schema = schema_and_metadata["annotated-schema"] # verify the stream level properties are as expected # verify there is only 1 top level breadcrumb stream_properties = [item for item in metadata if item.get("breadcrumb") == []] - self.assertTrue(len(stream_properties) == 1, - msg="There is more than one top level breadcrumb") + self.assertTrue(len(stream_properties) == 1, msg="There is more than one top level breadcrumb") # verify replication key(s) - actual = set(stream_properties[0].get("metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS) or []) + actual = set( + stream_properties[0].get("metadata", {self.REPLICATION_KEYS: []}).get(self.REPLICATION_KEYS) or [] + ) expected = self.expected_replication_keys()[stream] or set() - self.assertEqual( - actual, - expected, - msg="expected replication key {} but actual is {}".format( - expected, actual)) + self.assertEqual(actual, expected, msg=f"expected replication key {expected} but actual is {actual}") # verify primary key(s) self.assertEqual( - set(stream_properties[0].get( - "metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, [])), + set(stream_properties[0].get("metadata", {self.PRIMARY_KEYS: []}).get(self.PRIMARY_KEYS, [])), self.expected_primary_keys()[stream], msg="expected primary key {} but actual is {}".format( self.expected_primary_keys()[stream], - set(stream_properties[0].get( - "metadata", {self.PRIMARY_KEYS: None}).get(self.PRIMARY_KEYS, [])))) - + set(stream_properties[0].get("metadata", {self.PRIMARY_KEYS: None}).get(self.PRIMARY_KEYS, [])), + ), + ) expected_automatic_fields = self.expected_automatic_fields()[stream] or set() # verify that primary and replication keys # are given the inclusion of automatic in metadata. - actual_automatic_fields = {mdata['breadcrumb'][-1] - for mdata in metadata - if mdata['breadcrumb'] and mdata['metadata']['inclusion'] == 'automatic'} + actual_automatic_fields = { + mdata["breadcrumb"][-1] + for mdata in metadata + if mdata["breadcrumb"] and mdata["metadata"]["inclusion"] == "automatic" + } - actual_available_fields = {mdata['breadcrumb'][-1] - for mdata in metadata - if mdata['breadcrumb'] and mdata['metadata']['inclusion'] == 'available'} + actual_available_fields = { + mdata["breadcrumb"][-1] + for mdata in metadata + if mdata["breadcrumb"] and mdata["metadata"]["inclusion"] == "available" + } - self.assertEqual(expected_automatic_fields, - actual_automatic_fields, - msg="expected {} automatic fields but got {}".format( - expected_automatic_fields, - actual_automatic_fields)) + self.assertEqual( + expected_automatic_fields, + actual_automatic_fields, + msg="expected {} automatic fields but got {}".format( + expected_automatic_fields, actual_automatic_fields + ), + ) # verify that all other fields have inclusion of available # This assumes there are no unsupported fields for SaaS sources - self.assertSetEqual( - actual_available_fields, - set(schema['properties']) - actual_automatic_fields - ) + self.assertSetEqual(actual_available_fields, set(schema["properties"]) - actual_automatic_fields) diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py new file mode 100644 index 0000000..44620d3 --- /dev/null +++ b/tests/test_interupted_sync.py @@ -0,0 +1,100 @@ +"""Test that with no fields selected for a stream automatic fields are still +replicated.""" +import copy + +from base import BaseTapTest +from tap_tester import connections, menagerie, runner +from tap_tester.logger import LOGGER + + +class TestZendeskChatDiscoveryInteruptibleSync(BaseTapTest): + """Test tap's ability to recover from an interrupted sync.""" + + @staticmethod + def name(): + return "tap_tester_zendesk_chat_interrupted_sync" + + def test_run(self): + """Testing that if a sync job is interrupted and state is saved with + `currently_syncing`(stream) the next sync job kicks off and the tap + picks back up on that `currently_syncing` stream. + + - Verify behavior is consistent when an added stream is selected between initial and resuming sync + """ + + start_date = self.get_properties()["start_date"] + expected_streams = self.expected_streams() + + expected_replication_methods = self.expected_replication_method() + + # instantiate connection + conn_id = connections.ensure_connection(self) + + # run check mode + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # table and field selection + catalog_entries = [item for item in found_catalogs if item.get("stream_name") in expected_streams] + + self.perform_and_verify_table_and_field_selection(conn_id, catalog_entries, expected_streams) + + # Run a first sync job using orchestrator + first_sync_record_count = self.run_and_verify_sync(conn_id) + first_sync_bookmarks = menagerie.get_state(conn_id) + + completed_streams = {"account", "agents", "bans", "chats", "departments"} + pending_streams = {"shortcuts", "triggers"} + interrupt_stream = "goals" + interrupted_sync_states = self.create_interrupt_sync_state( + copy.deepcopy(first_sync_bookmarks), interrupt_stream, pending_streams, start_date + ) + menagerie.set_state(conn_id, interrupted_sync_states) + second_sync_record_count = self.run_and_verify_sync(conn_id) + + for stream in expected_streams: + with self.subTest(stream=stream): + expected_replication_method = expected_replication_methods[stream] + first_sync_count = first_sync_record_count.get(stream, 0) + second_sync_count = second_sync_record_count.get(stream, 0) + + if expected_replication_method == self.INCREMENTAL: + + if stream in completed_streams: + # Verify at least 1 record was replicated in the second sync + self.assertGreaterEqual( + second_sync_count, + 1, + msg=f"Incorrect bookmarking for {stream}, at least one or more record should be replicated", + ) + + elif stream == interrupted_sync_states.get("currently_syncing", None): + # For interrupted stream records sync count should be less equals + self.assertLessEqual( + second_sync_count, + first_sync_count, + msg=f"For interrupted stream - {stream}, seconds sync record count should be lesser or equal to first sync", + ) + elif stream in pending_streams: + # First sync and second sync record count match + self.assertGreaterEqual( + second_sync_count, + first_sync_count, + msg=f"For pending sync streams - {stream}, second sync record count should be more than or equal to first sync", + ) + + elif expected_replication_method == self.FULL: + self.assertEqual(second_sync_count, first_sync_count) + else: + raise NotImplementedError( + f"INVALID EXPECTATIONS: STREAM: {stream} REPLICATION_METHOD: {expected_replication_method}" + ) + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = {"start_date": "2022-10-10T00:00:00Z", "chat_search_interval_days": 1} + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value diff --git a/tests/test_pagination.py b/tests/test_pagination.py new file mode 100644 index 0000000..0883e5c --- /dev/null +++ b/tests/test_pagination.py @@ -0,0 +1,87 @@ +"""Test that with no fields selected for a stream automatic fields are still +replicated.""" +from math import ceil + +from base import BaseTapTest +from tap_tester import connections, runner +from tap_tester.logger import LOGGER + + +class TestZendeskChatPagination(BaseTapTest): + @staticmethod + def name(): + return "tap_tester_zendesk_chat_pagination" + + def test_run(self): + """ + - Verify that for each stream you can get multiple pages of data. + + This requires we ensure more than 1 page of data exists at all times for any given stream. + - Verify by pks that the data replicated matches the data we expect. + """ + + page_size = int(self.get_properties().get("agents_page_limit", 10)) + expected_streams = {"bans", "agents"} + # only "bans" and "agents" stream support pagination + # instantiate connection + conn_id = connections.ensure_connection(self) + + # run check mode + found_catalogs = self.run_and_verify_check_mode(conn_id) + + # table and field selection + catalog_entries = [item for item in found_catalogs if item.get("stream_name") in expected_streams] + + self.perform_and_verify_table_and_field_selection(conn_id, catalog_entries, expected_streams) + + # Run a first sync job using orchestrator + synced_records = runner.get_records_from_target_output() + + for stream in expected_streams: + with self.subTest(stream=stream): + page_size = self.BANS_PAGE_SIZE if stream == "bans" else self.AGENTS_PAGE_SIZE + # expected values + expected_primary_keys = self.expected_primary_keys() + # collect information for assertions from syncs 1 & 2 base on expected values + primary_keys_list = [ + tuple(message.get("data").get(expected_pk) for expected_pk in expected_primary_keys[stream]) + for message in synced_records.get(stream).get("messages") + if message.get("action") == "upsert" + ] + LOGGER.info("stream: %s pk_list %s", stream, primary_keys_list) + # verify records are more than page size so multiple page is working + # Chunk the replicated records (just primary keys) into expected pages + pages = [] + page_count = ceil(len(primary_keys_list) / page_size) + for page_index in range(page_count): + page_start = page_index * page_size + page_end = (page_index + 1) * page_size + pages.append(set(primary_keys_list[page_start:page_end])) + + LOGGER.info("items: %s page_count %s", len(primary_keys_list), page_count) + + # Verify by primary keys that data is unique for each page + for current_index, current_page in enumerate(pages): + with self.subTest(current_page_primary_keys=current_page): + for other_index, other_page in enumerate(pages): + if current_index == other_index: + continue # don't compare the page to itself + self.assertTrue( + current_page.isdisjoint(other_page), msg=f"other_page_primary_keys={other_page}" + ) + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = { + "start_date": "2021-10-10T00:00:00Z", + "agents_page_limit": self.AGENTS_PAGE_SIZE, + } + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value + + AGENTS_PAGE_SIZE = 1 + BANS_PAGE_SIZE = 100 diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 2f511dd..e471b2a 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -1,61 +1,40 @@ -""" -Test that the start_date configuration is respected -""" +"""Test that the start_date configuration is respected.""" from functools import reduce -import os - -from dateutil.parser import parse - -from tap_tester import menagerie, runner, connections - from base import BaseTapTest +from dateutil.parser import parse +from tap_tester import menagerie, runner +from tap_tester.logger import LOGGER class StartDateTest(BaseTapTest): + """Test that the start_date configuration is respected. + + - verify that a sync with a later start date has at least one record + synced and less records than the 1st sync with a previous start date + - verify that each stream has less records than the earlier + start date sync + - verify all data from later start data has bookmark values >= start_date + - verify that the minimum bookmark sent to the target for the later + start_date sync is >= start date """ - Test that the start_date configuration is respected - - • verify that a sync with a later start date has at least one record synced - and less records than the 1st sync with a previous start date - • verify that each stream has less records than the earlier start date sync - • verify all data from later start data has bookmark values >= start_date - • verify that the minimum bookmark sent to the target for the later start_date sync - is greater than or equal to the start date - """ - - def get_properties(self, original: bool = True): - return_value = { - 'start_date': '2021-04-01T00:00:00Z', - } - - if original: - return return_value - - return_value["start_date"] = '2021-05-06T00:00:00Z' - return return_value - - @staticmethod - def get_credentials(original_credentials: bool = True): - return { - 'access_token': os.getenv('TAP_ZENDESK_CHAT_ACCESS_TOKEN') - } @staticmethod def name(): return "tap_tester_zendesk_chat_start_date_test" def test_run(self): - """Test we get a lot of data back based on the start date configured in base""" + """Test we get a lot of data back based on the start date configured in + base.""" conn_id = self.create_connection() found_catalogs = menagerie.get_catalogs(conn_id) - incremental_streams = {key for key, value in self.expected_replication_method().items() - if value == self.INCREMENTAL} + incremental_streams = { + key for key, value in self.expected_replication_method().items() if value == self.INCREMENTAL + } - our_catalogs = [catalog for catalog in found_catalogs if - catalog.get('tap_stream_id') in incremental_streams] + our_catalogs = [catalog for catalog in found_catalogs if catalog.get("tap_stream_id") in incremental_streams] # Select all streams and all fields within streams self.select_all_streams_and_fields(conn_id, our_catalogs, select_all_fields=True) @@ -86,8 +65,7 @@ def test_run(self): # Select all streams and all fields within streams found_catalogs = menagerie.get_catalogs(conn_id) - our_catalogs = [catalog for catalog in found_catalogs if - catalog.get('tap_stream_id') in incremental_streams] + our_catalogs = [catalog for catalog in found_catalogs if catalog.get("tap_stream_id") in incremental_streams] self.select_all_streams_and_fields(conn_id, our_catalogs, select_all_fields=True) # Run a sync job using orchestrator @@ -107,7 +85,8 @@ def test_run(self): self.assertGreaterEqual( first_sync_record_count.get(stream, 0), second_sync_record_count.get(stream, 0), - msg="second had more records, start_date usage not verified") + msg="second had more records, start_date usage not verified", + ) # verify all data from 2nd sync >= start_date target_mark = second_min_bookmarks.get(stream, {"mark": None}) @@ -121,9 +100,18 @@ def test_run(self): # verify that the minimum bookmark sent to the target for the second sync # is greater than or equal to the start date - self.assertGreaterEqual(target_value, - self.local_to_utc(parse(self.start_date))) + self.assertGreaterEqual(target_value, self.local_to_utc(parse(self.start_date))) except (OverflowError, ValueError, TypeError): - print("bookmarks cannot be converted to dates, " - "can't test start_date for {}".format(stream)) + LOGGER.info("bookmarks cannot be converted to dates, " "can't test start_date for %s", stream) + + def get_properties(self, original: bool = True): + return_value = { + "start_date": "2021-04-01T00:00:00Z", + } + + if original: + return return_value + + return_value["start_date"] = "2021-05-06T00:00:00Z" + return return_value diff --git a/tests/unittests/test_auth_discovery.py b/tests/unittests/test_auth_discovery.py index 041fcd2..52c173e 100644 --- a/tests/unittests/test_auth_discovery.py +++ b/tests/unittests/test_auth_discovery.py @@ -1,7 +1,9 @@ -from requests.exceptions import HTTPError -import tap_zendesk_chat import unittest from unittest import mock + +from requests.exceptions import HTTPError + +import tap_zendesk_chat from tap_zendesk_chat.http import Client @@ -34,12 +36,9 @@ def mock_200_account_endpoint_exception(*args, **kwargs): class TestDiscoverMode(unittest.TestCase): - def test_basic_auth_no_access_401(self): - ''' - Verify exception is raised for no access(401) error code for basic auth - do the assertions inside exception block - ''' + """Verify exception is raised for no access(401) error code for basic + auth do the assertions inside exception block.""" args = Args() with self.assertRaises(HTTPError) as e: @@ -48,37 +47,29 @@ def test_basic_auth_no_access_401(self): expected_error_message = "401 Client Error: Unauthorized for url:" self.assertIn(expected_error_message, str(e.exception)) - @mock.patch('tap_zendesk_chat.utils', return_value=Args()) - @mock.patch('singer.catalog.Catalog.from_dict', return_value={"key": "value"}) + @mock.patch("tap_zendesk_chat.utils", return_value=Args()) + @mock.patch("singer.catalog.Catalog.from_dict", return_value={"key": "value"}) def test_discovery_no_config(self, mock_utils, mock_catalog): - """ - tests discovery method when config is None. - """ + """tests discovery method when config is None.""" expected = {"key": "value"} self.assertEqual(tap_zendesk_chat.discover(None), expected) - @mock.patch('tap_zendesk_chat.utils', return_value=Args()) - @mock.patch('singer.catalog.Catalog.from_dict', return_value={"key": "value"}) - @mock.patch('tap_zendesk_chat.http.Client.request') + @mock.patch("tap_zendesk_chat.utils", return_value=Args()) + @mock.patch("singer.catalog.Catalog.from_dict", return_value={"key": "value"}) + @mock.patch("tap_zendesk_chat.http.Client.request") def test_discovery(self, mock_utils, mock_catalog, mock_request): - """ - tests discovery method. - """ + """tests discovery method.""" expected = {"key": "value"} self.assertEqual(tap_zendesk_chat.discover(Args().config), expected) class TestAccountEndpointAuthorized(unittest.TestCase): - def test_is_account_not_authorized_404(self): - """ - tests if account_not_authorized method in discover raises http 404 - """ + """tests if account_not_authorized method in discover raises http + 404.""" client = Client(Args().config) with self.assertRaises(HTTPError) as e: client.request("xxxxxxx") expected_error_message = "404 Client Error: Not Found for url:" self.assertIn(expected_error_message, str(e.exception)) - - diff --git a/tests/unittests/test_context.py b/tests/unittests/test_context.py index ac94b94..69a7ebb 100644 --- a/tests/unittests/test_context.py +++ b/tests/unittests/test_context.py @@ -1,6 +1,7 @@ -from tap_zendesk_chat.context import Context import unittest +from tap_zendesk_chat.context import Context + class TestContextFunctions(unittest.TestCase): config = {"start_date": "2022-01-01", "access_token": ""} @@ -10,10 +11,8 @@ class TestContextFunctions(unittest.TestCase): context_client = Context(config, state, catalog) def test_bookmarks(self): - """ - tests bookmarks property for context module - returns {} with bookmarks key in state file - """ + """tests bookmarks property for context module returns {} with + bookmarks key in state file.""" self.assertEqual({}, self.context_client.bookmarks) self.context_client.state = {"bookmarks": {"account": {"start_date": self.config.get("start_date")}}} @@ -21,29 +20,30 @@ def test_bookmarks(self): self.assertEqual(1, len(self.context_client.bookmarks)) def test_get_bookmark(self): - """ - tests bookmark fn in context.py - """ - self.context_client.state = {"bookmarks": {"account": {"last_created": "2022-06-01"}, - "chats": {"chat.end_timestamp": "2022-06-01T15:00:00", - "offline_msg.timestamp": "2022-06-01T18:00:00"}}} + """tests bookmark fn in context.py.""" + self.context_client.state = { + "bookmarks": { + "account": {"last_created": "2022-06-01"}, + "chats": {"chat.end_timestamp": "2022-06-01T15:00:00", "offline_msg.timestamp": "2022-06-01T18:00:00"}, + } + } self.assertEqual("2022-06-01T18:00:00", self.context_client.bookmark(["chats", "offline_msg.timestamp"])) self.assertEqual({}, self.context_client.bookmark(["chats", "offline_msg.end_timestamp"])) self.assertEqual("2022-06-01T15:00:00", self.context_client.bookmark(["chats", "chat.end_timestamp"])) def test_set_bookmark(self): - """ - tests set_bookmark fn in context.py - set the bookmark using set_bookmark fn and assert the bookmark for stream in state json - """ - self.context_client.state = {"bookmarks": {"account": {"last_created": "2022-06-01"}, - "chats": {"chat.end_timestamp": "2022-06-01T15:00:00", - "offline_msg.timestamp": "2022-06-01T18:00:00"}}} + """tests set_bookmark fn in context.py set the bookmark using + set_bookmark fn and assert the bookmark for stream in state json.""" + self.context_client.state = { + "bookmarks": { + "account": {"last_created": "2022-06-01"}, + "chats": {"chat.end_timestamp": "2022-06-01T15:00:00", "offline_msg.timestamp": "2022-06-01T18:00:00"}, + } + } self.context_client.set_bookmark(["chats", "chat.end_timestamp"], "2022-07-01T01:00:00") self.assertEqual("2022-07-01T01:00:00", self.context_client.state["bookmarks"]["chats"]["chat.end_timestamp"]) self.context_client.set_bookmark(["account"], {"last_created": "2022-07-05"}) - self.assertEqual({"last_created": "2022-07-05"}, self.context_client.state["bookmarks"]["account"]) - + self.assertEqual({"last_created": "2022-07-05"}, self.context_client.state["bookmarks"]["account"]) diff --git a/tests/unittests/test_http_exceptions.py b/tests/unittests/test_http_exceptions.py index 61366a4..36195b4 100644 --- a/tests/unittests/test_http_exceptions.py +++ b/tests/unittests/test_http_exceptions.py @@ -1,6 +1,7 @@ -from tap_zendesk_chat.http import RateLimitException, Client -from unittest import mock import unittest +from unittest import mock + +from tap_zendesk_chat.http import Client, RateLimitException client = Client({"access_token": ""}) @@ -14,9 +15,7 @@ def __init__(self, resp, status_code, headers=None, raise_error=False): def mock_429_rate_limit_exception_response(*args, **kwargs): - """ - Mock the response with status code as 429 - """ + """Mock the response with status code as 429.""" return MockResponse({}, 429, headers={}, raise_error=True) @@ -29,27 +28,22 @@ class TestRateLimitExceptionRetry(unittest.TestCase): @mock.patch("requests.Session.send", side_effect=mock_429_rate_limit_exception_response) def test_rate_limit_429_error(self, mocked_send, mocked_sleep): - """ - verify the custom RateLimitException - Make sure API call gets retired for 10 times before raising RateLimitException - Verifying the retry is happening 10 times for the RateLimitException exception - """ + """verify the custom RateLimitException Make sure API call gets retired + for 10 times before raising RateLimitException Verifying the retry is + happening 10 times for the RateLimitException exception.""" with self.assertRaises(RateLimitException): client.request("departments") - self.assertEquals(mocked_send.call_count, 10) + self.assertEqual(mocked_send.call_count, 10) class TestBadGatewayExceptionRetry(unittest.TestCase): @mock.patch("time.sleep") @mock.patch("requests.Session.send", side_effect=mock_502_bad_gateway_exception_response) def test_rate_limit_502_error(self, mocked_send, mocked_sleep): - """ - verify the custom RateLimitException for 502 Bad Gateway exception - Make sure API call gets retired for 10 times before raising RateLimitException - Verifying the retry is happening 10 times for the RateLimitException exception - """ + """verify the custom RateLimitException for 502 Bad Gateway exception + Make sure API call gets retired for 10 times before raising + RateLimitException Verifying the retry is happening 10 times for the + RateLimitException exception.""" with self.assertRaises(RateLimitException): client.request("departments") - self.assertEquals(mocked_send.call_count, 10) - - + self.assertEqual(mocked_send.call_count, 10) diff --git a/tests/unittests/test_utils.py b/tests/unittests/test_utils.py index ecef954..02c3dbd 100644 --- a/tests/unittests/test_utils.py +++ b/tests/unittests/test_utils.py @@ -1,63 +1,65 @@ -from tap_zendesk_chat import utils import unittest +from tap_zendesk_chat import utils + + class BaseMetadata: - """ - creates a Base class for metadata - """ - metadata = [{"breadcrumb": [], "metadata": {"valid-replication-keys": [], - "table-key-properties": ["id"], "selected": True}}, {"breadcrumb": ["properties", "create_date"], - "metadata": {"inclusion": "available"}}] + """creates a Base class for metadata.""" + + metadata = [ + { + "breadcrumb": [], + "metadata": {"valid-replication-keys": [], "table-key-properties": ["id"], "selected": True}, + }, + {"breadcrumb": ["properties", "create_date"], "metadata": {"inclusion": "available"}}, + ] class Departments(BaseMetadata): - """ - Class for Departments stream - inherits BaseMetadata class - """ - stream = 'departments' + """Class for Departments stream inherits BaseMetadata class.""" + + stream = "departments" schema = {} - properties = ['description', 'name', 'id', 'enabled', 'members', 'settings'] + properties = ["description", "name", "id", "enabled", "members", "settings"] class Account(BaseMetadata): - """ - Class for Account stream - inherits BaseMetadata class - """ - stream = 'account' - properties = ['create_date', 'account_key', 'status', 'billing', 'plan'] + """Class for Account stream inherits BaseMetadata class.""" + + stream = "account" + properties = ["create_date", "account_key", "status", "billing", "plan"] class Bans: - """ - Class for Bans stream - has its own metadata attribute - """ - stream = 'bans' + """Class for Bans stream has its own metadata attribute.""" + + stream = "bans" properties = [] - metadata = [{"breadcrumb": [], "metadata": {"valid-replication-keys": [], - "table-key-properties": ["id"], "selected": False}}, {"breadcrumb": ["properties", "create_date"], - "metadata": {"inclusion": "available"}}] + metadata = [ + { + "breadcrumb": [], + "metadata": {"valid-replication-keys": [], "table-key-properties": ["id"], "selected": False}, + }, + {"breadcrumb": ["properties", "create_date"], "metadata": {"inclusion": "available"}}, + ] class TestMetadataFunctions(unittest.TestCase): - """ - Used to test metadata functions defined in tap_zendesk_chat/__init__.py file - """ + """Used to test metadata functions defined in tap_zendesk_chat/__init__.py + file.""" + POSITIVE_TEST_STREAMS = [Account, Departments] NEGATIVE_TEST_STREAM = [Bans] def test_load_schema(self): - """ - tests load_schema fn in tap_zendesk_chat/__init__.py file - checks if length of properties attr equals with size of properties in loaded schema using load_schema fn - """ + """tests load_schema fn in tap_zendesk_chat/__init__.py file checks if + length of properties attr equals with size of properties in loaded + schema using load_schema fn.""" for stream in self.POSITIVE_TEST_STREAMS: - self.assertEquals(len(stream.properties), len(utils.load_schema(stream.stream)['properties'])) + self.assertEqual(len(stream.properties), len(utils.load_schema(stream.stream)["properties"])) for stream in self.NEGATIVE_TEST_STREAM: - self.assertNotEqual(len(stream.properties), len(utils.load_schema(stream.stream)['properties'])) + self.assertNotEqual(len(stream.properties), len(utils.load_schema(stream.stream)["properties"])) def test_intervals(self): days = 30 @@ -68,4 +70,3 @@ def test_intervals(self): ("2018-01-02T18:14:33+00:00", "2018-02-01T18:14:33+00:00"), ("2018-02-01T18:14:33+00:00", "2018-02-14T10:30:20+00:00"), ] - From 4273e78019a4eb8a9257569f9f4ad478b5577366 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 16:40:14 +0530 Subject: [PATCH 29/39] fixed pylint issues --- tap_zendesk_chat/context.py | 6 +++--- tap_zendesk_chat/discover.py | 9 +++++---- tap_zendesk_chat/http.py | 6 +++--- tap_zendesk_chat/streams.py | 4 ++-- tap_zendesk_chat/utils.py | 6 ++++-- 5 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index da06526..1245bb2 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -1,7 +1,8 @@ from datetime import datetime from typing import Dict, List -from singer.bookmarks import ensure_bookmark_path + from singer import Catalog, write_state +from singer.bookmarks import ensure_bookmark_path from singer.utils import now from .http import Client @@ -29,8 +30,7 @@ def bookmark(self, path: List): """checks the state[file] for a nested path of bookmarks and returns value.""" bookmark = self.bookmarks - return ensure_bookmark_path(bookmark,path) - + return ensure_bookmark_path(bookmark, path) def set_bookmark(self, path, val): if isinstance(val, datetime): diff --git a/tap_zendesk_chat/discover.py b/tap_zendesk_chat/discover.py index 8989da4..3ebffac 100644 --- a/tap_zendesk_chat/discover.py +++ b/tap_zendesk_chat/discover.py @@ -1,7 +1,8 @@ import singer from requests.exceptions import HTTPError -from singer.metadata import write,to_list,to_map,get_standard_metadata from singer.catalog import Catalog +from singer.metadata import get_standard_metadata, to_list, to_map, write + from .http import Client from .streams import STREAMS from .utils import load_schema @@ -27,9 +28,8 @@ def account_not_authorized(client): def get_metadata(schema: dict, stream): - """ - tweaked inbuilt singer method to also mark the replication keys as automatic fields - """ + """tweaked inbuilt singer method to also mark the replication keys as + automatic fields.""" stream_metadata = get_standard_metadata( **{ "schema": schema, @@ -45,6 +45,7 @@ def get_metadata(schema: dict, stream): stream_metadata = to_list(stream_metadata) return stream_metadata + def discover(config: dict) -> Catalog: """discover function for tap-zendesk-chat.""" if config: diff --git a/tap_zendesk_chat/http.py b/tap_zendesk_chat/http.py index de2189d..a9239b3 100644 --- a/tap_zendesk_chat/http.py +++ b/tap_zendesk_chat/http.py @@ -13,9 +13,9 @@ class RateLimitException(Exception): class Client: def __init__(self, config): self.access_token = config["access_token"] - self.user_agent = config.get("user_agent","tap-zendesk-chat") + self.user_agent = config.get("user_agent", "tap-zendesk-chat") self.headers = {} - self.headers["Authorization"] = f"Bearer {self.access_token}" + self.headers["Authorization"] = f"Bearer {self.access_token}" self.headers["User-Agent"] = self.user_agent self.session = requests.Session() @@ -25,7 +25,7 @@ def request(self, tap_stream_id, params=None, url=None, url_extra=""): url = url or f"{BASE_URL}/api/v2/{tap_stream_id}{url_extra}" LOGGER.info("calling %s %s", url, params) - response = self.session.get(url,headers=self.headers,params=params) + response = self.session.get(url, headers=self.headers, params=params) timer.tags[metrics.Tag.http_status_code] = response.status_code if response.status_code in [429, 502]: diff --git a/tap_zendesk_chat/streams.py b/tap_zendesk_chat/streams.py index a8b8275..b3f94a2 100644 --- a/tap_zendesk_chat/streams.py +++ b/tap_zendesk_chat/streams.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import timedelta from typing import Dict, List import singer @@ -146,7 +146,7 @@ def _pull(self, ctx, chat_type, ts_field, full_sync, schema: Dict, stream_metada else: params = {"q": f"type:{chat_type} AND {ts_field}:[{start_dt.isoformat()} TO {end_dt.isoformat()}]"} search_resp = ctx.client.request(self.tap_stream_id, params=params, url_extra="/search") - + next_url = search_resp["next_url"] ctx.set_bookmark(url_offset_key, next_url) ctx.write_state() diff --git a/tap_zendesk_chat/utils.py b/tap_zendesk_chat/utils.py index af22419..03f9ff2 100644 --- a/tap_zendesk_chat/utils.py +++ b/tap_zendesk_chat/utils.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 from datetime import datetime, timedelta from pathlib import Path + import singer from singer.utils import load_json, strptime_to_utc + def load_schema(tap_stream_id): - schema = load_json(Path(__file__).parent.resolve()/f"schemas/{tap_stream_id}.json") + schema = load_json(Path(__file__).parent.resolve() / f"schemas/{tap_stream_id}.json") dependencies = schema.pop("tap_schema_dependencies", []) - refs = {ref:load_schema(ref) for ref in dependencies} + refs = {ref: load_schema(ref) for ref in dependencies} if refs: singer.resolve_schema_references(schema, refs) return schema From d4df3f16fe3e357a9bcb7a8c3674dfb76a95069e Mon Sep 17 00:00:00 2001 From: shantanu73 Date: Mon, 14 Nov 2022 12:32:37 +0000 Subject: [PATCH 30/39] Fixed bookmark tests. --- tests/unittests/test_context.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/unittests/test_context.py b/tests/unittests/test_context.py index 69a7ebb..d19235e 100644 --- a/tests/unittests/test_context.py +++ b/tests/unittests/test_context.py @@ -28,9 +28,11 @@ def test_get_bookmark(self): } } - self.assertEqual("2022-06-01T18:00:00", self.context_client.bookmark(["chats", "offline_msg.timestamp"])) - self.assertEqual({}, self.context_client.bookmark(["chats", "offline_msg.end_timestamp"])) - self.assertEqual("2022-06-01T15:00:00", self.context_client.bookmark(["chats", "chat.end_timestamp"])) + output = self.context_client.bookmark([]) + + self.assertEqual("2022-06-01T18:00:00", output["chats"]["offline_msg.timestamp"]) + self.assertEqual({}, output["chats"].get("offline_msg.end_timestamp", {})) + self.assertEqual("2022-06-01T15:00:00", output["chats"]["chat.end_timestamp"]) def test_set_bookmark(self): """tests set_bookmark fn in context.py set the bookmark using @@ -43,7 +45,7 @@ def test_set_bookmark(self): } self.context_client.set_bookmark(["chats", "chat.end_timestamp"], "2022-07-01T01:00:00") - self.assertEqual("2022-07-01T01:00:00", self.context_client.state["bookmarks"]["chats"]["chat.end_timestamp"]) + self.assertEqual("2022-06-01T15:00:00", self.context_client.state["bookmarks"]["chats"]["chat.end_timestamp"]) self.context_client.set_bookmark(["account"], {"last_created": "2022-07-05"}) self.assertEqual({"last_created": "2022-07-05"}, self.context_client.state["bookmarks"]["account"]) From 2a1adac26581040d6e9929b96f31cad06eba4563 Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 18:56:01 +0530 Subject: [PATCH 31/39] removed singer method from context --- tap_zendesk_chat/context.py | 7 +++++-- tests/unittests/test_context.py | 10 ++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tap_zendesk_chat/context.py b/tap_zendesk_chat/context.py index 1245bb2..5da6e53 100644 --- a/tap_zendesk_chat/context.py +++ b/tap_zendesk_chat/context.py @@ -2,7 +2,6 @@ from typing import Dict, List from singer import Catalog, write_state -from singer.bookmarks import ensure_bookmark_path from singer.utils import now from .http import Client @@ -30,7 +29,11 @@ def bookmark(self, path: List): """checks the state[file] for a nested path of bookmarks and returns value.""" bookmark = self.bookmarks - return ensure_bookmark_path(bookmark, path) + for p in path: + if p not in bookmark: + bookmark[p] = {} + bookmark = bookmark[p] + return bookmark def set_bookmark(self, path, val): if isinstance(val, datetime): diff --git a/tests/unittests/test_context.py b/tests/unittests/test_context.py index d19235e..69a7ebb 100644 --- a/tests/unittests/test_context.py +++ b/tests/unittests/test_context.py @@ -28,11 +28,9 @@ def test_get_bookmark(self): } } - output = self.context_client.bookmark([]) - - self.assertEqual("2022-06-01T18:00:00", output["chats"]["offline_msg.timestamp"]) - self.assertEqual({}, output["chats"].get("offline_msg.end_timestamp", {})) - self.assertEqual("2022-06-01T15:00:00", output["chats"]["chat.end_timestamp"]) + self.assertEqual("2022-06-01T18:00:00", self.context_client.bookmark(["chats", "offline_msg.timestamp"])) + self.assertEqual({}, self.context_client.bookmark(["chats", "offline_msg.end_timestamp"])) + self.assertEqual("2022-06-01T15:00:00", self.context_client.bookmark(["chats", "chat.end_timestamp"])) def test_set_bookmark(self): """tests set_bookmark fn in context.py set the bookmark using @@ -45,7 +43,7 @@ def test_set_bookmark(self): } self.context_client.set_bookmark(["chats", "chat.end_timestamp"], "2022-07-01T01:00:00") - self.assertEqual("2022-06-01T15:00:00", self.context_client.state["bookmarks"]["chats"]["chat.end_timestamp"]) + self.assertEqual("2022-07-01T01:00:00", self.context_client.state["bookmarks"]["chats"]["chat.end_timestamp"]) self.context_client.set_bookmark(["account"], {"last_created": "2022-07-05"}) self.assertEqual({"last_created": "2022-07-05"}, self.context_client.state["bookmarks"]["account"]) From 6114d48bb02e5a524377c700ea2859045a6dcebe Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 20:19:18 +0530 Subject: [PATCH 32/39] fixed naming conventions --- tests/base.py | 21 ++++++++++++++------- tests/test_all_fields.py | 34 +++++++++++++++++++--------------- tests/test_automatic_fields.py | 4 ++-- tests/test_bookmarks.py | 4 ++-- tests/test_discovery.py | 4 ++-- tests/test_interupted_sync.py | 4 ++-- tests/test_pagination.py | 4 ++-- tests/test_start_date.py | 4 ++-- 8 files changed, 45 insertions(+), 34 deletions(-) diff --git a/tests/base.py b/tests/base.py index 974d5be..6e40c30 100644 --- a/tests/base.py +++ b/tests/base.py @@ -10,7 +10,7 @@ from tap_tester import connections, menagerie, runner -class BaseTapTest(unittest.TestCase): +class ZendeskChatBaseTest(unittest.TestCase): REPLICATION_KEYS = "valid-replication-keys" PRIMARY_KEYS = "table-key-properties" REPLICATION_METHOD = "forced-replication-method" @@ -53,17 +53,23 @@ def expected_metadata(self): default = { self.PRIMARY_KEYS: {"id"}, - self.REPLICATION_METHOD: self.FULL, + self.REPLICATION_METHOD: self.FULL } - shortcuts_rep_key = {self.PRIMARY_KEYS: {"name"}, self.REPLICATION_METHOD: self.FULL} + shortcuts_rep_key = { + self.PRIMARY_KEYS: {"name"}, + self.REPLICATION_METHOD: self.FULL + } - account_rep_key = {self.PRIMARY_KEYS: {"account_key"}, self.REPLICATION_METHOD: self.FULL} + account_rep_key = { + self.PRIMARY_KEYS: {"account_key"}, + self.REPLICATION_METHOD: self.FULL + } chats_rep_key = { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_KEYS: {"timestamp", "end_timestamp"}, - self.REPLICATION_METHOD: self.INCREMENTAL, + self.REPLICATION_METHOD: self.INCREMENTAL } return { @@ -74,7 +80,7 @@ def expected_metadata(self): "bans": default, "departments": default, "goals": default, - "account": account_rep_key, + "account": account_rep_key } def expected_streams(self): @@ -85,7 +91,8 @@ def expected_primary_keys(self): """return a dictionary with key of table name and value as a set of primary key fields.""" return { - table: properties.get(self.PRIMARY_KEYS, set()) for table, properties in self.expected_metadata().items() + table: properties.get(self.PRIMARY_KEYS, set()) + for table, properties in self.expected_metadata().items() } def expected_replication_keys(self): diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index 7c1847c..4d049c5 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -1,16 +1,29 @@ """Test that with no fields selected for a stream automatic fields are still replicated.""" -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, menagerie, runner -class TestZendeskChatAllFields(BaseTapTest): +class TestZendeskChatAllFields(ZendeskChatBaseTest): """Test that all fields selected for a stream are replicated.""" @staticmethod def name(): return "tap_tester_zendesk_chat_all_fields" + KNOWN_MISSING_FIELDS = { + "agents": { + "scope", + }, + "account": { + "billing", + }, + "shortcuts": { + "departments", + "agents", + }, + } + def test_run(self): """ - Verify no unexpected streams were replicated @@ -44,6 +57,7 @@ def test_run(self): expected_automatic_keys = self.expected_automatic_fields().get(stream) data = synced_records.get(stream) actual_all_keys = set() + for message in data["messages"]: if message["action"] == "upsert": actual_all_keys.update(message["data"].keys()) @@ -52,8 +66,11 @@ def test_run(self): expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"', ) + self.assertGreater(len(expected_all_keys), len(expected_automatic_keys)) + expected_all_keys = expected_all_keys - self.KNOWN_MISSING_FIELDS.get(stream, set()) + self.assertGreater( record_count_by_stream.get(stream, -1), 0, @@ -61,19 +78,6 @@ def test_run(self): ) self.assertSetEqual(expected_all_keys, actual_all_keys) - KNOWN_MISSING_FIELDS = { - "agents": { - "scope", - }, - "account": { - "billing", - }, - "shortcuts": { - "departments", - "agents", - }, - } - def get_properties(self, original: bool = True): """Configuration properties required for the tap.""" diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index fc1689e..e063b4c 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -1,11 +1,11 @@ """Test that with no fields selected for a stream automatic fields are still replicated.""" -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, runner from tap_tester.logger import LOGGER -class TestZendeskChatAutomaticFields(BaseTapTest): +class TestZendeskChatAutomaticFields(ZendeskChatBaseTest): """Test that with no fields selected for a stream automatic fields are still replicated.""" diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py index efa5ab6..69c903d 100644 --- a/tests/test_bookmarks.py +++ b/tests/test_bookmarks.py @@ -1,10 +1,10 @@ -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, menagerie, runner STREAMS_WITH_BOOKMARKS = ["agents", "chats"] -class TestZendeskChatBookmarks(BaseTapTest): +class TestZendeskChatBookmarks(ZendeskChatBaseTest): """Test tap sets a bookmark and respects it for the next sync of a stream.""" diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 947a620..699a633 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -1,11 +1,11 @@ """Test tap discovery.""" import re -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, menagerie -class TestZendeskChatDiscovery(BaseTapTest): +class TestZendeskChatDiscovery(ZendeskChatBaseTest): @staticmethod def name(): return "tap_tester_tap_zendesk_chat_discovery" diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py index 44620d3..2aadab5 100644 --- a/tests/test_interupted_sync.py +++ b/tests/test_interupted_sync.py @@ -2,12 +2,12 @@ replicated.""" import copy -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, menagerie, runner from tap_tester.logger import LOGGER -class TestZendeskChatDiscoveryInteruptibleSync(BaseTapTest): +class TestZendeskChatDiscoveryInteruptibleSync(ZendeskChatBaseTest): """Test tap's ability to recover from an interrupted sync.""" @staticmethod diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 0883e5c..8782f00 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -2,12 +2,12 @@ replicated.""" from math import ceil -from base import BaseTapTest +from base import ZendeskChatBaseTest from tap_tester import connections, runner from tap_tester.logger import LOGGER -class TestZendeskChatPagination(BaseTapTest): +class TestZendeskChatPagination(ZendeskChatBaseTest): @staticmethod def name(): return "tap_tester_zendesk_chat_pagination" diff --git a/tests/test_start_date.py b/tests/test_start_date.py index e471b2a..0166082 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -2,13 +2,13 @@ from functools import reduce -from base import BaseTapTest +from base import ZendeskChatBaseTest from dateutil.parser import parse from tap_tester import menagerie, runner from tap_tester.logger import LOGGER -class StartDateTest(BaseTapTest): +class StartDateTest(ZendeskChatBaseTest): """Test that the start_date configuration is respected. - verify that a sync with a later start date has at least one record From a1978349d6f6930d3d7633d711cc418a7bbe5e6f Mon Sep 17 00:00:00 2001 From: VishalP <20889199+Vi6hal@users.noreply.github.com> Date: Mon, 14 Nov 2022 22:01:35 +0530 Subject: [PATCH 33/39] fixed suggestions on PR --- tests/base.py | 20 ++++++++---------- tests/test_all_fields.py | 38 +++++++++++++++++----------------- tests/test_automatic_fields.py | 36 ++++++++++++++++---------------- tests/test_bookmarks.py | 20 +++++++++--------- tests/test_interupted_sync.py | 20 +++++++++--------- tests/test_pagination.py | 32 ++++++++++++++-------------- tests/test_start_date.py | 22 ++++++++++---------- 7 files changed, 93 insertions(+), 95 deletions(-) diff --git a/tests/base.py b/tests/base.py index 6e40c30..9844d0e 100644 --- a/tests/base.py +++ b/tests/base.py @@ -50,7 +50,6 @@ def get_credentials(): def expected_metadata(self): """The expected streams and metadata about the streams.""" - default = { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_METHOD: self.FULL @@ -69,7 +68,7 @@ def expected_metadata(self): chats_rep_key = { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_KEYS: {"timestamp", "end_timestamp"}, - self.REPLICATION_METHOD: self.INCREMENTAL + self.REPLICATION_METHOD: self.INCREMENTAL, } return { @@ -80,22 +79,21 @@ def expected_metadata(self): "bans": default, "departments": default, "goals": default, - "account": account_rep_key + "account": account_rep_key, } - def expected_streams(self): + def expected_streams(self) -> Set: """A set of expected stream names.""" return set(self.expected_metadata().keys()) - def expected_primary_keys(self): + def expected_primary_keys(self) -> Dict: """return a dictionary with key of table name and value as a set of primary key fields.""" return { - table: properties.get(self.PRIMARY_KEYS, set()) - for table, properties in self.expected_metadata().items() + table: properties.get(self.PRIMARY_KEYS, set()) for table, properties in self.expected_metadata().items() } - def expected_replication_keys(self): + def expected_replication_keys(self) -> Dict: """return a dictionary with key of table name and value as a set of replication key fields.""" return { @@ -103,13 +101,13 @@ def expected_replication_keys(self): for table, properties in self.expected_metadata().items() } - def expected_automatic_fields(self): + def expected_automatic_fields(self) -> Dict: return { table: self.expected_primary_keys().get(table) | self.expected_replication_keys().get(table) for table in self.expected_metadata() } - def expected_replication_method(self): + def expected_replication_method(self) -> Dict: """return a dictionary with key of table name and value of replication method.""" return { @@ -129,7 +127,7 @@ def setUp(self): # Helper Methods # ######################### - def run_sync(self, conn_id): + def run_sync(self, conn_id: int): """Run a sync job and make sure it exited properly. Return a dictionary with keys of streams synced and values of diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index 4d049c5..b4e3055 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -24,6 +24,24 @@ def name(): }, } + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + + return_value = { + "start_date": "2017-01-15T00:00:00Z", + "chat_search_interval_days": 500, + } + + if original: + return return_value + + # Start Date test needs the new connections start date to be prior to the default + assert self.start_date < return_value["start_date"] + + # Assign start date to be the default + return_value["start_date"] = self.start_date + return return_value + def test_run(self): """ - Verify no unexpected streams were replicated @@ -57,7 +75,7 @@ def test_run(self): expected_automatic_keys = self.expected_automatic_fields().get(stream) data = synced_records.get(stream) actual_all_keys = set() - + for message in data["messages"]: if message["action"] == "upsert": actual_all_keys.update(message["data"].keys()) @@ -77,21 +95,3 @@ def test_run(self): msg="The number of records is not over the stream max limit", ) self.assertSetEqual(expected_all_keys, actual_all_keys) - - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - - return_value = { - "start_date": "2017-01-15T00:00:00Z", - "chat_search_interval_days": 500, - } - - if original: - return return_value - - # Start Date test needs the new connections start date to be prior to the default - assert self.start_date < return_value["start_date"] - - # Assign start date to be the default - return_value["start_date"] = self.start_date - return return_value diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index e063b4c..10e301c 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -13,6 +13,24 @@ class TestZendeskChatAutomaticFields(ZendeskChatBaseTest): def name(): return "tap_tester_zendesk_chat_automatic_fields" + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + + return_value = { + "start_date": "2017-01-15T00:00:00Z", + "chat_search_interval_days": 500, + } + + if original: + return return_value + + # Start Date test needs the new connections start date to be prior to the default + assert self.start_date < return_value["start_date"] + + # Assign start date to be the default + return_value["start_date"] = self.start_date + return return_value + def test_run(self): """ - Verify we can deselect all fields except when inclusion=automatic, which is handled by base.py methods @@ -53,21 +71,3 @@ def test_run(self): else: for actual_keys in record_messages_keys: self.assertSetEqual(expected_keys, actual_keys) - - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - - return_value = { - "start_date": "2017-01-15T00:00:00Z", - "chat_search_interval_days": 500, - } - - if original: - return return_value - - # Start Date test needs the new connections start date to be prior to the default - assert self.start_date < return_value["start_date"] - - # Assign start date to be the default - return_value["start_date"] = self.start_date - return return_value diff --git a/tests/test_bookmarks.py b/tests/test_bookmarks.py index 69c903d..f8eb166 100644 --- a/tests/test_bookmarks.py +++ b/tests/test_bookmarks.py @@ -12,6 +12,16 @@ class TestZendeskChatBookmarks(ZendeskChatBaseTest): def name(): return "tap_tester_zendesk_chat_bookmarks" + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = {"start_date": "2017-01-15T00:00:00Z", "agents_page_limit": 1, "chat_search_interval_days": 2} + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value + def test_run(self): """ - Verify that for each stream you can do a sync which records bookmarks. @@ -180,13 +190,3 @@ def test_run(self): # Verify at least 1 record was replicated in the second sync self.assertGreater(second_sync_count, 0, msg=f"We are not fully testing bookmarking for {stream}") - - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - return_value = {"start_date": "2017-01-15T00:00:00Z", "agents_page_limit": 1, "chat_search_interval_days": 2} - if original: - return return_value - - return_value["start_date"] = self.start_date - - return return_value diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py index 2aadab5..df74e05 100644 --- a/tests/test_interupted_sync.py +++ b/tests/test_interupted_sync.py @@ -14,6 +14,16 @@ class TestZendeskChatDiscoveryInteruptibleSync(ZendeskChatBaseTest): def name(): return "tap_tester_zendesk_chat_interrupted_sync" + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = {"start_date": "2022-10-10T00:00:00Z", "chat_search_interval_days": 1} + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value + def test_run(self): """Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) the next sync job kicks off and the tap @@ -88,13 +98,3 @@ def test_run(self): raise NotImplementedError( f"INVALID EXPECTATIONS: STREAM: {stream} REPLICATION_METHOD: {expected_replication_method}" ) - - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - return_value = {"start_date": "2022-10-10T00:00:00Z", "chat_search_interval_days": 1} - if original: - return return_value - - return_value["start_date"] = self.start_date - - return return_value diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 8782f00..3d98c95 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -12,6 +12,22 @@ class TestZendeskChatPagination(ZendeskChatBaseTest): def name(): return "tap_tester_zendesk_chat_pagination" + AGENTS_PAGE_SIZE = 1 + BANS_PAGE_SIZE = 100 + + def get_properties(self, original: bool = True): + """Configuration properties required for the tap.""" + return_value = { + "start_date": "2021-10-10T00:00:00Z", + "agents_page_limit": self.AGENTS_PAGE_SIZE, + } + if original: + return return_value + + return_value["start_date"] = self.start_date + + return return_value + def test_run(self): """ - Verify that for each stream you can get multiple pages of data. @@ -69,19 +85,3 @@ def test_run(self): self.assertTrue( current_page.isdisjoint(other_page), msg=f"other_page_primary_keys={other_page}" ) - - def get_properties(self, original: bool = True): - """Configuration properties required for the tap.""" - return_value = { - "start_date": "2021-10-10T00:00:00Z", - "agents_page_limit": self.AGENTS_PAGE_SIZE, - } - if original: - return return_value - - return_value["start_date"] = self.start_date - - return return_value - - AGENTS_PAGE_SIZE = 1 - BANS_PAGE_SIZE = 100 diff --git a/tests/test_start_date.py b/tests/test_start_date.py index 0166082..6be726f 100644 --- a/tests/test_start_date.py +++ b/tests/test_start_date.py @@ -24,6 +24,17 @@ class StartDateTest(ZendeskChatBaseTest): def name(): return "tap_tester_zendesk_chat_start_date_test" + def get_properties(self, original: bool = True): + return_value = { + "start_date": "2021-04-01T00:00:00Z", + } + + if original: + return return_value + + return_value["start_date"] = "2021-05-06T00:00:00Z" + return return_value + def test_run(self): """Test we get a lot of data back based on the start date configured in base.""" @@ -104,14 +115,3 @@ def test_run(self): except (OverflowError, ValueError, TypeError): LOGGER.info("bookmarks cannot be converted to dates, " "can't test start_date for %s", stream) - - def get_properties(self, original: bool = True): - return_value = { - "start_date": "2021-04-01T00:00:00Z", - } - - if original: - return return_value - - return_value["start_date"] = "2021-05-06T00:00:00Z" - return return_value From 688ab9d2cec644794b2c9a10918dd2b22f78fda1 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 15 Nov 2022 09:19:25 +0000 Subject: [PATCH 34/39] fixed interupted sync expectations --- tests/base.py | 20 ------- tests/test_interupted_sync.py | 98 +++++++++++++++++++++++++++++++---- 2 files changed, 89 insertions(+), 29 deletions(-) diff --git a/tests/base.py b/tests/base.py index 9844d0e..527ddbd 100644 --- a/tests/base.py +++ b/tests/base.py @@ -418,26 +418,6 @@ def __init__(self, *args, **kwargs): self.start_date = self.get_properties().get("start_date") self.maxDiff = None - def create_interrupt_sync_state( - self, state: Dict, interrupt_stream: str, pending_streams: Set, start_date: str - ) -> Dict: - """This function will create a new interrupt sync bookmark state.""" - expected_replication_keys = self.expected_replication_keys() - bookmark_state = state["bookmarks"] - if self.expected_metadata()[interrupt_stream][self.REPLICATION_METHOD] == self.INCREMENTAL: - replication_key = next(iter(expected_replication_keys[interrupt_stream])) - bookmark_date = bookmark_state[interrupt_stream][replication_key] - updated_bookmark_date = self.get_mid_point_date(start_date, bookmark_date) - bookmark_state[interrupt_stream][replication_key] = updated_bookmark_date - state["currently_syncing"] = interrupt_stream - # For pending streams, update the bookmark_value to start-date - for stream in iter(pending_streams): - # Only incremental streams should have the bookmark value - if self.expected_metadata()[stream][self.REPLICATION_METHOD] == self.INCREMENTAL: - replication_key = next(iter(expected_replication_keys[stream])) - bookmark_state[stream][replication_key] = start_date - state["bookmarks"] = bookmark_state - return state def get_mid_point_date(self, start_date: str, bookmark_date: str) -> str: """Function to find the middle date between two dates.""" diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py index df74e05..ea20389 100644 --- a/tests/test_interupted_sync.py +++ b/tests/test_interupted_sync.py @@ -5,6 +5,7 @@ from base import ZendeskChatBaseTest from tap_tester import connections, menagerie, runner from tap_tester.logger import LOGGER +from singer.utils import strptime_to_utc class TestZendeskChatDiscoveryInteruptibleSync(ZendeskChatBaseTest): @@ -16,7 +17,7 @@ def name(): def get_properties(self, original: bool = True): """Configuration properties required for the tap.""" - return_value = {"start_date": "2022-10-10T00:00:00Z", "chat_search_interval_days": 1} + return_value = {"start_date": "2017-01-10T00:00:00Z", "chat_search_interval_days": 300} if original: return return_value @@ -33,7 +34,8 @@ def test_run(self): """ start_date = self.get_properties()["start_date"] - expected_streams = self.expected_streams() + # skipping following stremas {"goals","shortcuts", "triggers"} + expected_streams = {"account", "agents","bans","chats","departments"} expected_replication_methods = self.expected_replication_method() @@ -50,16 +52,47 @@ def test_run(self): # Run a first sync job using orchestrator first_sync_record_count = self.run_and_verify_sync(conn_id) + first_sync_records = runner.get_records_from_target_output() + first_sync_bookmarks = menagerie.get_state(conn_id) - completed_streams = {"account", "agents", "bans", "chats", "departments"} - pending_streams = {"shortcuts", "triggers"} - interrupt_stream = "goals" - interrupted_sync_states = self.create_interrupt_sync_state( - copy.deepcopy(first_sync_bookmarks), interrupt_stream, pending_streams, start_date - ) + completed_streams = {"account", "agents", "bans"} + interrupt_stream = "chats" + pending_streams = {"departments"} + interrupted_sync_states = copy.deepcopy(first_sync_bookmarks) + bookmark_state = interrupted_sync_states["bookmarks"] + # set the interrupt stream as currently syncing + interrupted_sync_states["currently_syncing"] = interrupt_stream + + # remove bookmark for completed streams to set them as pending + # setting value to start date wont be needed as all other streams are full_table + for stream in pending_streams: + bookmark_state.pop(stream,None) + + # update state for chats stream and set the bookmark to a date earlier + chats_bookmark = bookmark_state.get("chats",{}) + chats_bookmark.pop("offset",None) + chats_rec,offline_msgs_rec = [],[] + for record in first_sync_records.get("chats").get("messages"): + if record.get("action") == "upsert": + rec = record.get("data") + if rec["type"] == "offline_msg": + offline_msgs_rec.append(rec) + else: + chats_rec.append(rec) + + # set a deffered bookmark value for both the bookmarks of chat stream + if len(chats_rec) > 1: + chats_bookmark["chat.end_timestamp"] = chats_rec[-1]["end_timestamp"] + if len(offline_msgs_rec) > 1: + chats_bookmark["offline_msg.timestamp"] = offline_msgs_rec[-1]["timestamp"] + + bookmark_state["chats"] = chats_bookmark + interrupted_sync_states["bookmarks"] = bookmark_state menagerie.set_state(conn_id, interrupted_sync_states) + second_sync_record_count = self.run_and_verify_sync(conn_id) + second_sync_records = runner.get_records_from_target_output() for stream in expected_streams: with self.subTest(stream=stream): @@ -67,6 +100,10 @@ def test_run(self): first_sync_count = first_sync_record_count.get(stream, 0) second_sync_count = second_sync_record_count.get(stream, 0) + # gather results + full_records = [message['data'] for message in first_sync_records[stream]['messages']] + interrupted_records = [message['data'] for message in second_sync_records[stream]['messages']] + if expected_replication_method == self.INCREMENTAL: if stream in completed_streams: @@ -76,7 +113,6 @@ def test_run(self): 1, msg=f"Incorrect bookmarking for {stream}, at least one or more record should be replicated", ) - elif stream == interrupted_sync_states.get("currently_syncing", None): # For interrupted stream records sync count should be less equals self.assertLessEqual( @@ -84,6 +120,50 @@ def test_run(self): first_sync_count, msg=f"For interrupted stream - {stream}, seconds sync record count should be lesser or equal to first sync", ) + + # Verify the interrupted sync replicates the expected record set + # All interrupted recs are in full recs + for record in interrupted_records: + self.assertIn( + record, + full_records, + msg='incremental table record in interrupted sync not found in full sync' + ) + + # Verify resuming sync only replicates records with replication key values greater or equal to + # the interrupted_state for streams that were replicated during the interrupted sync. + if stream == "chats": + + interrupted_bmk_chat_msg = strptime_to_utc(bookmark_state["chats"]["offline_msg.timestamp"]) + interrupted_bmk_chat = strptime_to_utc(bookmark_state["chats"]["chat.end_timestamp"]) + + for record in interrupted_records: + if record["type"] == "offline_msg": + rec_time = strptime_to_utc(record.get("timestamp")) + self.assertGreaterEqual(rec_time, interrupted_bmk_chat_msg) + else: + rec_time = strptime_to_utc(record.get("end_timestamp")) + self.assertGreaterEqual(rec_time, interrupted_bmk_chat) + + # Record count for all streams of interrupted sync match expectations + full_records_after_interrupted_bookmark = 0 + + for record in full_records: + if record["type"] == "offline_msg": + rec_time = strptime_to_utc(record.get("timestamp")) + if rec_time >= interrupted_bmk_chat_msg: + full_records_after_interrupted_bookmark += 1 + else: + rec_time = strptime_to_utc(record.get("end_timestamp")) + if rec_time >= interrupted_bmk_chat: + full_records_after_interrupted_bookmark += 1 + + self.assertEqual( + full_records_after_interrupted_bookmark, + len(interrupted_records), + msg=f"Expected {full_records_after_interrupted_bookmark} records in each sync" + ) + elif stream in pending_streams: # First sync and second sync record count match self.assertGreaterEqual( From 6686a94e457071630f46d1b8735ab31162e138b4 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:30:44 +0000 Subject: [PATCH 35/39] fixed inter sync --- tests/base.py | 78 ++++++++++++++++++++++++----------- tests/test_interupted_sync.py | 78 ++++++++++++----------------------- 2 files changed, 81 insertions(+), 75 deletions(-) diff --git a/tests/base.py b/tests/base.py index 527ddbd..b51eb00 100644 --- a/tests/base.py +++ b/tests/base.py @@ -1,11 +1,12 @@ """Setup expectations for test sub classes Run discovery for as a prerequisite for most tests.""" +import copy import json import os import unittest from datetime import datetime as dt from datetime import timezone as tz -from typing import Dict, Set +from typing import Any, Dict, Set from tap_tester import connections, menagerie, runner @@ -18,6 +19,11 @@ class ZendeskChatBaseTest(unittest.TestCase): FULL = "FULL_TABLE" START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.start_date = self.get_properties().get("start_date") + self.maxDiff = None + @staticmethod def tap_name(): """The name of the tap.""" @@ -50,6 +56,7 @@ def get_credentials(): def expected_metadata(self): """The expected streams and metadata about the streams.""" + default = { self.PRIMARY_KEYS: {"id"}, self.REPLICATION_METHOD: self.FULL @@ -156,7 +163,7 @@ def local_to_utc(date: dt): return utc - def max_bookmarks_by_stream(self, sync_records): + def max_bookmarks_by_stream(self, sync_records: Any): """Return the maximum value for the replication key for the events stream which is the bookmark expected value for updated records. @@ -208,7 +215,7 @@ def max_bookmarks_by_stream(self, sync_records): max_bookmarks[stream][stream_bookmark_key] = bk_value return max_bookmarks - def min_bookmarks_by_stream(self, sync_records): + def min_bookmarks_by_stream(self, sync_records: Any): """Return the minimum value for the replication key for each stream.""" min_bookmarks = {} chats = [] @@ -254,7 +261,9 @@ def min_bookmarks_by_stream(self, sync_records): min_bookmarks[stream][stream_bookmark_key] = bk_value return min_bookmarks - def select_all_streams_and_fields(self, conn_id, catalogs, select_all_fields: bool = True, exclude_streams=None): + def select_all_streams_and_fields( + self, conn_id: Any, catalogs: Any, select_all_fields: bool = True, exclude_streams=None + ): """Select all streams and all fields within streams.""" for catalog in catalogs: @@ -302,7 +311,7 @@ def get_selected_fields_from_metadata(metadata): selected_fields.add(field["breadcrumb"][1]) return selected_fields - def run_and_verify_check_mode(self, conn_id): + def run_and_verify_check_mode(self, conn_id: Any): """Run the tap in check mode and verify it succeeds. This should be ran prior to field selection and initial sync. @@ -323,7 +332,7 @@ def run_and_verify_check_mode(self, conn_id): self.assertEqual(len(diff), 0, msg=f"discovered schemas do not match: {diff}") return found_catalogs - def run_and_verify_sync(self, conn_id, clear_state=False): + def run_and_verify_sync(self, conn_id, clear_state: bool = False): """Clear the connections state in menagerie and Run a Sync. Verify the exit code following the sync. @@ -348,7 +357,7 @@ def run_and_verify_sync(self, conn_id, clear_state=False): return record_count_by_stream def perform_and_verify_table_and_field_selection( - self, conn_id, found_catalogs, streams_to_select, select_all_fields=True + self, conn_id: Any, found_catalogs: Any, streams_to_select: Any, select_all_fields: bool = True ): """Perform table and field selection based off of the streams to select set and field selection parameters. @@ -388,7 +397,7 @@ def perform_and_verify_table_and_field_selection( selected_fields = self.get_selected_fields_from_metadata(catalog_entry["metadata"]) self.assertEqual(expected_automatic_fields, selected_fields) - def expected_schema_keys(self, stream): + def expected_schema_keys(self, stream: Any): props = self._load_schemas(stream).get(stream).get("properties") if not props: props = self._load_schemas(stream, shared=True).get(stream).get("properties") @@ -398,7 +407,7 @@ def expected_schema_keys(self, stream): return props.keys() @staticmethod - def _get_abs_path(path): + def _get_abs_path(path: str): return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) def _load_schemas(self, stream, shared: bool = False): @@ -413,18 +422,39 @@ def _load_schemas(self, stream, shared: bool = False): return schemas - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.start_date = self.get_properties().get("start_date") - self.maxDiff = None - - - def get_mid_point_date(self, start_date: str, bookmark_date: str) -> str: - """Function to find the middle date between two dates.""" - date_format = "%Y-%m-%dT%H:%M:%S.%fZ" - start_date_dt = dt.strptime(start_date, date_format) - bookmark_date_dt = dt.strptime(bookmark_date, date_format) - mid_date_dt = start_date_dt.date() + (bookmark_date_dt - start_date_dt) / 2 - # Convert datetime object to string format - mid_date = mid_date_dt.strftime(date_format) - return mid_date + def create_interrupt_sync_state(self, state: dict, interrupt_stream: str, pending_streams: list, sync_records: Any): + """Creates a state for simulating a interupted sync and backdating + bookmarks for interrupted stream.""" + + interrupted_sync_states = copy.deepcopy(state) + bookmark_state = interrupted_sync_states["bookmarks"] + # Set the interrupt stream as currently syncing + interrupted_sync_states["currently_syncing"] = interrupt_stream + + # Remove bookmark for completed streams to set them as pending + # Setting value to start date wont be needed as all other streams are full_table + for stream in pending_streams: + bookmark_state.pop(stream, None) + + # update state for chats stream and set the bookmark to a date earlier + chats_bookmark = bookmark_state.get("chats", {}) + chats_bookmark.pop("offset", None) + chats_rec, offline_msgs_rec = [], [] + for record in sync_records.get("chats").get("messages"): + if record.get("action") == "upsert": + rec = record.get("data") + if rec["type"] == "offline_msg": + offline_msgs_rec.append(rec) + else: + chats_rec.append(rec) + + # set a deferred bookmark value for both the bookmarks of chat stream + chat_index = len(chats_rec) // 2 if len(chats_rec) > 1 else 0 + chats_bookmark["chat.end_timestamp"] = chats_rec[chat_index]["end_timestamp"] + + msg_index = len(offline_msgs_rec) // 2 if len(offline_msgs_rec) > 1 else 0 + chats_bookmark["offline_msg.timestamp"] = offline_msgs_rec[msg_index]["timestamp"] + + bookmark_state["chats"] = chats_bookmark + interrupted_sync_states["bookmarks"] = bookmark_state + return interrupted_sync_states diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py index ea20389..20f832c 100644 --- a/tests/test_interupted_sync.py +++ b/tests/test_interupted_sync.py @@ -1,11 +1,11 @@ """Test that with no fields selected for a stream automatic fields are still replicated.""" import copy +from typing import Any from base import ZendeskChatBaseTest -from tap_tester import connections, menagerie, runner -from tap_tester.logger import LOGGER from singer.utils import strptime_to_utc +from tap_tester import connections, menagerie, runner class TestZendeskChatDiscoveryInteruptibleSync(ZendeskChatBaseTest): @@ -25,27 +25,28 @@ def get_properties(self, original: bool = True): return return_value + def test_run(self): """Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) the next sync job kicks off and the tap picks back up on that `currently_syncing` stream. - Verify behavior is consistent when an added stream is selected between initial and resuming sync + - Verify only records with replication-key values greater than or equal to the stream level bookmark are + replicated on the resuming sync for the interrupted stream. + - Verify the yet-to-be-synced streams are replicated following the interrupted stream in the resuming sync. """ - start_date = self.get_properties()["start_date"] - # skipping following stremas {"goals","shortcuts", "triggers"} - expected_streams = {"account", "agents","bans","chats","departments"} - + expected_streams = self.expected_streams() expected_replication_methods = self.expected_replication_method() # instantiate connection conn_id = connections.ensure_connection(self) - # run check mode + # Run check mode found_catalogs = self.run_and_verify_check_mode(conn_id) - # table and field selection + # Table and field selection catalog_entries = [item for item in found_catalogs if item.get("stream_name") in expected_streams] self.perform_and_verify_table_and_field_selection(conn_id, catalog_entries, expected_streams) @@ -58,37 +59,12 @@ def test_run(self): completed_streams = {"account", "agents", "bans"} interrupt_stream = "chats" - pending_streams = {"departments"} - interrupted_sync_states = copy.deepcopy(first_sync_bookmarks) - bookmark_state = interrupted_sync_states["bookmarks"] - # set the interrupt stream as currently syncing - interrupted_sync_states["currently_syncing"] = interrupt_stream - - # remove bookmark for completed streams to set them as pending - # setting value to start date wont be needed as all other streams are full_table - for stream in pending_streams: - bookmark_state.pop(stream,None) - - # update state for chats stream and set the bookmark to a date earlier - chats_bookmark = bookmark_state.get("chats",{}) - chats_bookmark.pop("offset",None) - chats_rec,offline_msgs_rec = [],[] - for record in first_sync_records.get("chats").get("messages"): - if record.get("action") == "upsert": - rec = record.get("data") - if rec["type"] == "offline_msg": - offline_msgs_rec.append(rec) - else: - chats_rec.append(rec) + pending_streams = {"departments", "goals", "shortcuts", "triggers"} - # set a deffered bookmark value for both the bookmarks of chat stream - if len(chats_rec) > 1: - chats_bookmark["chat.end_timestamp"] = chats_rec[-1]["end_timestamp"] - if len(offline_msgs_rec) > 1: - chats_bookmark["offline_msg.timestamp"] = offline_msgs_rec[-1]["timestamp"] - - bookmark_state["chats"] = chats_bookmark - interrupted_sync_states["bookmarks"] = bookmark_state + interrupted_sync_states = self.create_interrupt_sync_state( + first_sync_bookmarks, interrupt_stream, pending_streams, first_sync_records + ) + bookmark_state = interrupted_sync_states["bookmarks"] menagerie.set_state(conn_id, interrupted_sync_states) second_sync_record_count = self.run_and_verify_sync(conn_id) @@ -100,9 +76,9 @@ def test_run(self): first_sync_count = first_sync_record_count.get(stream, 0) second_sync_count = second_sync_record_count.get(stream, 0) - # gather results - full_records = [message['data'] for message in first_sync_records[stream]['messages']] - interrupted_records = [message['data'] for message in second_sync_records[stream]['messages']] + # Gather results + full_records = [message["data"] for message in first_sync_records[stream]["messages"]] + interrupted_records = [message["data"] for message in second_sync_records[stream]["messages"]] if expected_replication_method == self.INCREMENTAL: @@ -124,16 +100,16 @@ def test_run(self): # Verify the interrupted sync replicates the expected record set # All interrupted recs are in full recs for record in interrupted_records: - self.assertIn( - record, - full_records, - msg='incremental table record in interrupted sync not found in full sync' - ) + self.assertIn( + record, + full_records, + msg="incremental table record in interrupted sync not found in full sync", + ) # Verify resuming sync only replicates records with replication key values greater or equal to - # the interrupted_state for streams that were replicated during the interrupted sync. + # The interrupted_state for streams that were replicated during the interrupted sync. if stream == "chats": - + interrupted_bmk_chat_msg = strptime_to_utc(bookmark_state["chats"]["offline_msg.timestamp"]) interrupted_bmk_chat = strptime_to_utc(bookmark_state["chats"]["chat.end_timestamp"]) @@ -147,7 +123,7 @@ def test_run(self): # Record count for all streams of interrupted sync match expectations full_records_after_interrupted_bookmark = 0 - + for record in full_records: if record["type"] == "offline_msg": rec_time = strptime_to_utc(record.get("timestamp")) @@ -157,11 +133,11 @@ def test_run(self): rec_time = strptime_to_utc(record.get("end_timestamp")) if rec_time >= interrupted_bmk_chat: full_records_after_interrupted_bookmark += 1 - + self.assertEqual( full_records_after_interrupted_bookmark, len(interrupted_records), - msg=f"Expected {full_records_after_interrupted_bookmark} records in each sync" + msg=f"Expected {full_records_after_interrupted_bookmark} records in each sync", ) elif stream in pending_streams: From 9fdbc5b79668f064201ea0d94524c53b921f56bd Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:39:49 +0000 Subject: [PATCH 36/39] fixed pylint issue --- tests/base.py | 2 +- tests/test_interupted_sync.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/base.py b/tests/base.py index b51eb00..9963873 100644 --- a/tests/base.py +++ b/tests/base.py @@ -423,7 +423,7 @@ def _load_schemas(self, stream, shared: bool = False): return schemas def create_interrupt_sync_state(self, state: dict, interrupt_stream: str, pending_streams: list, sync_records: Any): - """Creates a state for simulating a interupted sync and backdating + """Creates a state for simulating a interrupted sync and backdating bookmarks for interrupted stream.""" interrupted_sync_states = copy.deepcopy(state) diff --git a/tests/test_interupted_sync.py b/tests/test_interupted_sync.py index 20f832c..45b0dcd 100644 --- a/tests/test_interupted_sync.py +++ b/tests/test_interupted_sync.py @@ -1,8 +1,5 @@ """Test that with no fields selected for a stream automatic fields are still replicated.""" -import copy -from typing import Any - from base import ZendeskChatBaseTest from singer.utils import strptime_to_utc from tap_tester import connections, menagerie, runner @@ -25,7 +22,6 @@ def get_properties(self, original: bool = True): return return_value - def test_run(self): """Testing that if a sync job is interrupted and state is saved with `currently_syncing`(stream) the next sync job kicks off and the tap From b7765ebc379e7830afd7504136fcc6047e6b46d0 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 16 Nov 2022 08:28:00 +0000 Subject: [PATCH 37/39] fixed review comments --- tests/test_all_fields.py | 10 ++++------ tests/test_automatic_fields.py | 10 +++++++++- tests/test_discovery.py | 8 +++++--- tests/test_pagination.py | 18 +++++++++++------- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index b4e3055..bb0cacb 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -81,17 +81,15 @@ def test_run(self): actual_all_keys.update(message["data"].keys()) self.assertTrue( - expected_automatic_keys.issubset(expected_all_keys), - msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"', + expected_automatic_keys.issubset(actual_all_keys), + msg=f'{expected_automatic_keys-actual_all_keys} is not in "expected_all_keys"', ) - self.assertGreater(len(expected_all_keys), len(expected_automatic_keys)) - - expected_all_keys = expected_all_keys - self.KNOWN_MISSING_FIELDS.get(stream, set()) - self.assertGreater( record_count_by_stream.get(stream, -1), 0, msg="The number of records is not over the stream max limit", ) + expected_all_keys = expected_all_keys - self.KNOWN_MISSING_FIELDS.get(stream, set()) + self.assertGreaterEqual(len(expected_all_keys), len(actual_all_keys)) self.assertSetEqual(expected_all_keys, actual_all_keys) diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index 10e301c..2c6673a 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -65,9 +65,17 @@ def test_run(self): msg="The number of records is not over the stream max limit", ) if stream == "chats": + # chats stream has two types of records "offline_msgs" and "chat" both of them have different replication keys + # the key "end_timestamp" is not available for "offline_msgs" + # hence we need to verify the record has both or atleaset one key expected_keys_offline_msg = self.expected_automatic_fields().get(stream) - {"end_timestamp"} for actual_keys in record_messages_keys: - self.assertTrue(actual_keys == expected_keys_offline_msg or actual_keys == expected_keys) + if actual_keys == expected_keys: + pass + elif actual_keys == expected_keys_offline_msg: + pass + else: + self.fail(f"Record of type: chat does not have the following automatic fields {expected_keys_offline_msg-actual_keys}") else: for actual_keys in record_messages_keys: self.assertSetEqual(expected_keys, actual_keys) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 699a633..9a499c3 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -16,11 +16,13 @@ def test_run(self): - Verify number of actual streams discovered match expected - Verify the stream names discovered were what we expect - - Verify stream names follow naming convention streams should only have lowercase alphas and underscores + - Verify stream names follow naming convention (streams should only have lowercase alphas and underscores_ - verify there is only 1 top level breadcrumb + - verify replication key(s) - verify primary key(s) - - verify that primary keys are given the inclusion of automatic. - - verify that all other fields have inclusion of available metadata. + - verify the actual replication matches our expected replication method + - verify that primary, replication and foreign keys are given the inclusion of automatic (metadata and annotated schema). + - verify that all other fields have inclusion of available (metadata and schema) """ conn_id = connections.ensure_connection(self) diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 3d98c95..c44678f 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -37,8 +37,9 @@ def test_run(self): """ page_size = int(self.get_properties().get("agents_page_limit", 10)) - expected_streams = {"bans", "agents"} # only "bans" and "agents" stream support pagination + expected_streams = {"bans", "agents"} + # instantiate connection conn_id = connections.ensure_connection(self) @@ -58,23 +59,25 @@ def test_run(self): page_size = self.BANS_PAGE_SIZE if stream == "bans" else self.AGENTS_PAGE_SIZE # expected values expected_primary_keys = self.expected_primary_keys() - # collect information for assertions from syncs 1 & 2 base on expected values primary_keys_list = [ tuple(message.get("data").get(expected_pk) for expected_pk in expected_primary_keys[stream]) for message in synced_records.get(stream).get("messages") if message.get("action") == "upsert" ] - LOGGER.info("stream: %s pk_list %s", stream, primary_keys_list) + rec_count = len(primary_keys_list) + # verify records are more than page size so multiple page is working + self.assertGreater(rec_count,page_size,msg="The number of records is not over the stream max limit") + # Chunk the replicated records (just primary keys) into expected pages pages = [] - page_count = ceil(len(primary_keys_list) / page_size) + page_count = ceil(rec_count / page_size) for page_index in range(page_count): page_start = page_index * page_size page_end = (page_index + 1) * page_size pages.append(set(primary_keys_list[page_start:page_end])) - LOGGER.info("items: %s page_count %s", len(primary_keys_list), page_count) + LOGGER.info("items: %s page_count %s", rec_count, page_count) # Verify by primary keys that data is unique for each page for current_index, current_page in enumerate(pages): @@ -82,6 +85,7 @@ def test_run(self): for other_index, other_page in enumerate(pages): if current_index == other_index: continue # don't compare the page to itself - self.assertTrue( - current_page.isdisjoint(other_page), msg=f"other_page_primary_keys={other_page}" + self.assertTrue( + current_page.isdisjoint(other_page), + msg=f"other_page_primary_keys={other_page}" ) From 94a0918675c8296fe067bc7d1571bd5f7f604fe4 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 16 Nov 2022 08:30:12 +0000 Subject: [PATCH 38/39] fixed linting issues --- tests/test_automatic_fields.py | 7 ++++--- tests/test_pagination.py | 7 +++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index 2c6673a..75905fd 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -2,7 +2,6 @@ replicated.""" from base import ZendeskChatBaseTest from tap_tester import connections, runner -from tap_tester.logger import LOGGER class TestZendeskChatAutomaticFields(ZendeskChatBaseTest): @@ -66,7 +65,7 @@ def test_run(self): ) if stream == "chats": # chats stream has two types of records "offline_msgs" and "chat" both of them have different replication keys - # the key "end_timestamp" is not available for "offline_msgs" + # the key "end_timestamp" is not available for "offline_msgs" # hence we need to verify the record has both or atleaset one key expected_keys_offline_msg = self.expected_automatic_fields().get(stream) - {"end_timestamp"} for actual_keys in record_messages_keys: @@ -75,7 +74,9 @@ def test_run(self): elif actual_keys == expected_keys_offline_msg: pass else: - self.fail(f"Record of type: chat does not have the following automatic fields {expected_keys_offline_msg-actual_keys}") + self.fail( + f"Record of type: chat does not have the following automatic fields {expected_keys_offline_msg-actual_keys}" + ) else: for actual_keys in record_messages_keys: self.assertSetEqual(expected_keys, actual_keys) diff --git a/tests/test_pagination.py b/tests/test_pagination.py index c44678f..775ac70 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -67,7 +67,7 @@ def test_run(self): rec_count = len(primary_keys_list) # verify records are more than page size so multiple page is working - self.assertGreater(rec_count,page_size,msg="The number of records is not over the stream max limit") + self.assertGreater(rec_count, page_size, msg="The number of records is not over the stream max limit") # Chunk the replicated records (just primary keys) into expected pages pages = [] @@ -85,7 +85,6 @@ def test_run(self): for other_index, other_page in enumerate(pages): if current_index == other_index: continue # don't compare the page to itself - self.assertTrue( - current_page.isdisjoint(other_page), - msg=f"other_page_primary_keys={other_page}" + self.assertTrue( + current_page.isdisjoint(other_page), msg=f"other_page_primary_keys={other_page}" ) From 1b70322c76c838b1bed6275dd29d453017add2e5 Mon Sep 17 00:00:00 2001 From: Vi6hal <20889199+Vi6hal@users.noreply.github.com> Date: Wed, 16 Nov 2022 10:36:15 +0000 Subject: [PATCH 39/39] fixed assert for automatic fields --- tests/base.py | 4 ++-- tests/test_all_fields.py | 2 +- tests/test_automatic_fields.py | 44 +++++++++++++++++++++++++--------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/base.py b/tests/base.py index 9963873..1501d96 100644 --- a/tests/base.py +++ b/tests/base.py @@ -43,7 +43,7 @@ def get_properties(self, original: bool = True): return return_value # Start Date test needs the new connections start date to be prior to the default - assert self.start_date < return_value["start_date"] + self.assertTrue(self.start_date < return_value["start_date"]) # Assign start date to be the default return_value["start_date"] = self.start_date @@ -68,7 +68,7 @@ def expected_metadata(self): } account_rep_key = { - self.PRIMARY_KEYS: {"account_key"}, + self.PRIMARY_KEYS: {"account_key"}, self.REPLICATION_METHOD: self.FULL } diff --git a/tests/test_all_fields.py b/tests/test_all_fields.py index bb0cacb..080e778 100644 --- a/tests/test_all_fields.py +++ b/tests/test_all_fields.py @@ -36,7 +36,7 @@ def get_properties(self, original: bool = True): return return_value # Start Date test needs the new connections start date to be prior to the default - assert self.start_date < return_value["start_date"] + self.assertTrue(self.start_date < return_value["start_date"]) # Assign start date to be the default return_value["start_date"] = self.start_date diff --git a/tests/test_automatic_fields.py b/tests/test_automatic_fields.py index 75905fd..841baf7 100644 --- a/tests/test_automatic_fields.py +++ b/tests/test_automatic_fields.py @@ -1,7 +1,9 @@ """Test that with no fields selected for a stream automatic fields are still replicated.""" +from typing import Dict + from base import ZendeskChatBaseTest -from tap_tester import connections, runner +from tap_tester import connections, menagerie, runner class TestZendeskChatAutomaticFields(ZendeskChatBaseTest): @@ -24,12 +26,32 @@ def get_properties(self, original: bool = True): return return_value # Start Date test needs the new connections start date to be prior to the default - assert self.start_date < return_value["start_date"] + self.assertTrue(self.start_date < return_value["start_date"]) # Assign start date to be the default return_value["start_date"] = self.start_date return return_value + def get_chat_type_mapping(self, conn_id: str) -> Dict: + """performs a sync with all fields to get data on chat type mapping to + make correct assertions based on chat type. + + returns {"chat_id":"type"} + """ + + expected_streams = self.expected_streams() + menagerie.set_state(conn_id, {}) + found_catalogs = self.run_and_verify_check_mode(conn_id) + catalog_entries = [catalog for catalog in found_catalogs if catalog.get("stream_name") in expected_streams] + self.perform_and_verify_table_and_field_selection( + conn_id, catalog_entries, expected_streams, select_all_fields=True + ) + self.run_and_verify_sync(conn_id) + synced_records = runner.get_records_from_target_output() + data = synced_records.get("chats", {})["messages"] + chat_type_mapping = {row["data"]["id"]: row["data"]["type"] for row in data if row["action"] == "upsert"} + return chat_type_mapping + def test_run(self): """ - Verify we can deselect all fields except when inclusion=automatic, which is handled by base.py methods @@ -50,6 +72,8 @@ def test_run(self): record_count_by_stream = self.run_and_verify_sync(conn_id) synced_records = runner.get_records_from_target_output() + chat_mapping = self.get_chat_type_mapping(conn_id) + for stream in expected_streams: with self.subTest(stream=stream): @@ -68,15 +92,13 @@ def test_run(self): # the key "end_timestamp" is not available for "offline_msgs" # hence we need to verify the record has both or atleaset one key expected_keys_offline_msg = self.expected_automatic_fields().get(stream) - {"end_timestamp"} - for actual_keys in record_messages_keys: - if actual_keys == expected_keys: - pass - elif actual_keys == expected_keys_offline_msg: - pass - else: - self.fail( - f"Record of type: chat does not have the following automatic fields {expected_keys_offline_msg-actual_keys}" - ) + for row in data["messages"]: + rec = row["data"] + actual_keys = set(rec.keys()) + if chat_mapping[rec["id"]] == "offline_msg": + self.assertSetEqual(actual_keys, expected_keys_offline_msg) + elif chat_mapping[rec["id"]] == "chat": + self.assertSetEqual(actual_keys, expected_keys) else: for actual_keys in record_messages_keys: self.assertSetEqual(expected_keys, actual_keys)