Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix invalid timestamp handling in dashboard transformer #339

Merged
merged 4 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion databuilder/models/dashboard/dashboard_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def create_next_relation(self) -> Union[Dict[str, Any], None]:

def _create_relation_iterator(self) -> Optional[Iterator[Dict[str, Any]]]:
for table_id in self._table_ids:
m = re.match('(\w+)://(\w+)\.(\w+)\/(\w+)', table_id)
m = re.match('([^./]+)://([^./]+)\.([^./]+)\/([^./]+)', table_id)
if m:
yield {
RELATION_START_LABEL: DashboardMetadata.DASHBOARD_NODE_LABEL,
Expand Down
5 changes: 4 additions & 1 deletion databuilder/rest_api/rest_api_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def execute(self) -> Iterator[Dict[str, Any]]: # noqa: C901
result_list: List[Any] = [match.value for match in self._jsonpath_expr.find(response_json)]

if not result_list:
log_msg = 'No result from URL: {url} , JSONPATH: {json_path} , response payload: {response}' \
log_msg = 'No result from URL: {url}, JSONPATH: {json_path} , response payload: {response}' \
.format(url=self._url, json_path=self._json_path, response=response_json)
LOGGER.info(log_msg)

Expand All @@ -172,6 +172,9 @@ def execute(self) -> Iterator[Dict[str, Any]]: # noqa: C901
json_path_contains_or=self._json_path_contains_or)

for sub_record in sub_records:
if not sub_record or len(sub_record) != len(self._field_names):
# skip the record
continue
record_dict = copy.deepcopy(record_dict)
for field_name in self._field_names:
record_dict[field_name] = sub_record.pop(0)
Expand Down
7 changes: 6 additions & 1 deletion databuilder/transformer/timestamp_string_to_epoch.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def transform(self, record: Dict[str, Any]) -> Dict[str, Any]:
if not timestamp_str:
return record

utc_dt = datetime.strptime(timestamp_str, self._timestamp_format)
try:
utc_dt = datetime.strptime(timestamp_str, self._timestamp_format)
except ValueError:
# if the timestamp_str doesn't match format, no conversion, return initial result
record[self._field_name] = 0
return record

record[self._field_name] = int((utc_dt - datetime(1970, 1, 1)).total_seconds())
return record
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from setuptools import setup, find_packages


__version__ = '3.0.0'
__version__ = '3.1.0'

requirements_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt')
with open(requirements_path) as requirements_file:
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/models/dashboard/test_dashboard_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,30 @@ def test_dashboard_table_relations(self) -> None:
RELATION_REVERSE_TYPE: 'TABLE_OF_DASHBOARD'}
assert actual is not None
self.assertDictEqual(actual, expected)

def test_dashboard_table_without_dot_as_name(self) -> None:
dashboard_table = DashboardTable(table_ids=['bq-name://project-id.schema-name/table-name'],
cluster='cluster_id', product='product_id',
dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')
actual = dashboard_table.create_next_relation()
expected = {RELATION_END_KEY: 'bq-name://project-id.schema-name/table-name', RELATION_START_LABEL: 'Dashboard',
RELATION_END_LABEL: 'Table',
RELATION_START_KEY: 'product_id_dashboard://cluster_id.dashboard_group_id/dashboard_id',
RELATION_TYPE: 'DASHBOARD_WITH_TABLE',
RELATION_REVERSE_TYPE: 'TABLE_OF_DASHBOARD'}
assert actual is not None
self.assertDictEqual(actual, expected)

def test_dashboard_table_with_dot_as_name(self) -> None:
dashboard_table = DashboardTable(table_ids=['bq-name://project.id.schema-name/table-name'],
cluster='cluster_id', product='product_id',
dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')
actual = dashboard_table.create_next_relation()
self.assertIsNone(actual)

def test_dashboard_table_with_slash_as_name(self) -> None:
dashboard_table = DashboardTable(table_ids=['bq/name://project/id.schema/name/table/name'],
cluster='cluster_id', product='product_id',
dashboard_id='dashboard_id', dashboard_group_id='dashboard_group_id')
actual = dashboard_table.create_next_relation()
self.assertIsNone(actual)
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ def test_conversion_with_format(self) -> None:
actual = transformer.transform({'foo': '2020-02-19T19:52:33Z'})
self.assertDictEqual({'foo': 1582141953}, actual)

def test_invalid_timestamp(self) -> None:
transformer = TimestampStringToEpoch()
config = ConfigFactory.from_dict({
FIELD_NAME: 'foo',
})
transformer.init(conf=config)
actual = transformer.transform({'foo': '165de33266d4'})
self.assertEquals(actual['foo'], 0)


if __name__ == '__main__':
unittest.main()