From 5704f1ec82baf5540ae0bb66d4536cb07a48eb33 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 14:58:42 -0500 Subject: [PATCH 01/20] test: refactor `list_rows` tests and add test for scalars --- tests/data/scalars.jsonl | 4 +- tests/data/scalars_extreme.jsonl | 10 +-- tests/data/scalars_schema.json | 53 ++++++++------ tests/system/test_arrow.py | 36 ++++++++-- tests/system/test_client.py | 48 ------------- tests/system/test_list_rows.py | 120 +++++++++++++++++++++++++++++++ 6 files changed, 188 insertions(+), 83 deletions(-) create mode 100644 tests/system/test_list_rows.py diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl index 4419a6e9a..e06139e5c 100644 --- a/tests/data/scalars.jsonl +++ b/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl index ceccd8dbc..d0a33fdba 100644 --- a/tests/data/scalars_extreme.jsonl +++ b/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json index 00bd150fd..3c58e37d4 100644 --- a/tests/data/scalars_schema.json +++ b/tests/data/scalars_schema.json @@ -1,33 +1,32 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +35,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f97488e39..e6b1b3ec6 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,8 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + + pyarrow = pytest.importorskip( "pyarrow", minversion="3.0.0" ) # Needs decimal256 for BIGNUMERIC columns. @@ -31,17 +37,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # Avoid INTERVAL columns until they are supported by the BigQuery Storage + # API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..2f57dd6f2 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2394,54 +2394,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py new file mode 100644 index 000000000..70c6ae712 --- /dev/null +++ b/tests/system/test_list_rows.py @@ -0,0 +1,120 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = list( + sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = list( + sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal( + "9.9999999999999999999999999999999999999E+28" + ) + assert row["bignumeric_col"] == decimal.Decimal( + "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37" + ) + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None From 5eb3794bad8e7a1508783dda3911871924138819 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 15:38:51 -0500 Subject: [PATCH 02/20] WIP: INTERVAL support --- google/cloud/bigquery/enums.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 0da01d665..eb5f766a7 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -254,6 +254,7 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + INTERVAL = "INTERVAL" # NOTE: not available in legacy types class SqlParameterScalarTypes: @@ -276,6 +277,7 @@ class SqlParameterScalarTypes: DATE = ScalarQueryParameterType("DATE") TIME = ScalarQueryParameterType("TIME") DATETIME = ScalarQueryParameterType("DATETIME") + INTERVAL = ScalarQueryParameterType("INTERVAL") class WriteDisposition(object): From 9e5427779fdd4b6483c2d96e854573a2e0e773b7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 18:03:48 -0500 Subject: [PATCH 03/20] feat: add support for INTERVAL data type to `list_rows` --- google/cloud/bigquery/_helpers.py | 47 +++++++++++++++++++- setup.py | 1 + testing/constraints-3.6.txt | 1 + tests/system/test_list_rows.py | 8 ++++ tests/unit/helpers/test_from_json.py | 66 ++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 tests/unit/helpers/test_from_json.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 0a1f71444..dbe99e710 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,8 +19,9 @@ import decimal import math import re -from typing import Union +from typing import Optional, Union +from dateutil import relativedelta from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds @@ -42,6 +43,17 @@ re.VERBOSE, ) +# BigQuery sends data in "canonical format" +# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type +_INTERVAL_PATTERN = re.compile( + r"(?P-?)(?P[0-9]+)-(?P[0-9]+) " + r"(?P-?[0-9]+) " + r"(?P-?)(?P[0-9]+):(?P[0-9]+):" + r"(?P[0-9]+)\.?(?P[0-9]+)?$" +) + +# TODO: BigQuery receives data in ISO 8601 duration format + _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -114,6 +126,38 @@ def _int_from_json(value, field): return int(value) +def _interval_from_json( + value: Optional[str], field +) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.rjust(6, "0")) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) + + def _float_from_json(value, field): """Coerce 'value' to a float, if set or not nullable.""" if _not_null(value, field): @@ -250,6 +294,7 @@ def _record_from_json(value, field): _CELLDATA_FROM_JSON = { "INTEGER": _int_from_json, "INT64": _int_from_json, + "INTERVAL": _interval_from_json, "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, diff --git a/setup.py b/setup.py index 0ca19b576..fb2d1cbed 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,7 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "python-dateutil >= 2.7.0, <3.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..dbef2ae6d 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -17,6 +17,7 @@ pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==1.0.0 +python-dateutil==2.7.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py index 70c6ae712..59a4cc58f 100644 --- a/tests/system/test_list_rows.py +++ b/tests/system/test_list_rows.py @@ -15,6 +15,8 @@ import datetime import decimal +from dateutil import relativedelta + from google.cloud import bigquery from google.cloud.bigquery import enums @@ -66,6 +68,9 @@ def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str) assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" assert row["int64_col"] == 123456789 + assert row["interval_col"] == relativedelta.relativedelta( + years=7, months=11, days=9, hours=4, minutes=15, seconds=37, microseconds=123456 + ) assert row["numeric_col"] == decimal.Decimal("1.23456789") assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") assert row["float64_col"] == 1.25 @@ -99,6 +104,9 @@ def test_list_rows_scalars_extreme( assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) assert row["geography_col"] == "POINT(-135 90)" assert row["int64_col"] == 9223372036854775807 + assert row["interval_col"] == relativedelta.relativedelta( + years=-10000, days=-3660000, hours=-87840000 + ) assert row["numeric_col"] == decimal.Decimal( "9.9999999999999999999999999999999999999E+28" ) diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py new file mode 100644 index 000000000..bec09542c --- /dev/null +++ b/tests/unit/helpers/test_from_json.py @@ -0,0 +1,66 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dateutil.relativedelta import relativedelta +import pytest + +from google.cloud.bigquery.schema import SchemaField + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +def test_interval_from_json_w_none_nullable(mut): + got = mut._interval_from_json(None, create_field()) + assert got is None + + +def test_interval_from_json_w_none_required(mut): + with pytest.raises(TypeError): + mut._interval_from_json(None, create_field(mode="REQUIRED")) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + # SELECT INTERVAL -1 YEAR + ("-1-0 0 0:0:0", relativedelta(years=-1)), + # SELECT INTERVAL -1 MONTH + ("-0-1 0 0:0:0", relativedelta(months=-1)), + # SELECT INTERVAL -1 DAY + ("0-0 -1 0:0:0", relativedelta(days=-1)), + # SELECT INTERVAL -1 HOUR + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + # SELECT INTERVAL -1 MINUTE + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + # SELECT INTERVAL -1 SECOND + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + # TODO: Test with multiple digits + # TODO: Test with mixed +/- + # TODO: Test with fraction that is not microseconds (maybe milliseconds?) + ), +) +def test_w_string_values(mut, value, expected): + got = mut._interval_from_json(value, create_field()) + assert got == expected From 60d9ca793e7fa8dea5e453f8537d831ac1af4031 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 10:33:29 -0500 Subject: [PATCH 04/20] fix relativedelta construction for non-microseconds --- google/cloud/bigquery/_helpers.py | 2 +- tests/unit/helpers/test_from_json.py | 46 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index dbe99e710..eea20b649 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -145,7 +145,7 @@ def _interval_from_json( minutes = time_sign * int(parsed.group("minutes")) seconds = time_sign * int(parsed.group("seconds")) fraction = parsed.group("fraction") - microseconds = time_sign * int(fraction.rjust(6, "0")) if fraction else 0 + microseconds = time_sign * int(fraction.ljust(6, "0")) if fraction else 0 return relativedelta.relativedelta( years=years, diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py index bec09542c..38306c2fa 100644 --- a/tests/unit/helpers/test_from_json.py +++ b/tests/unit/helpers/test_from_json.py @@ -56,9 +56,49 @@ def test_interval_from_json_w_none_required(mut): ("0-0 0 -0:0:1", relativedelta(seconds=-1)), # SELECT (INTERVAL -1 SECOND) / 1000000 ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), - # TODO: Test with multiple digits - # TODO: Test with mixed +/- - # TODO: Test with fraction that is not microseconds (maybe milliseconds?) + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:0.1", relativedelta(microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), ), ) def test_w_string_values(mut, value, expected): From da6ef5b185623ab547a9126307e2de09d506832a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 12:53:52 -0500 Subject: [PATCH 05/20] WIP: support INTERVAL query params --- docs/conf.py | 1 + google/cloud/bigquery/enums.py | 26 ++++++++++---------- google/cloud/bigquery/query.py | 43 +++++++++++++++++++++++++++------- tests/system/test_client.py | 18 ++++++++++++++ 4 files changed, 67 insertions(+), 21 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index cb347160d..18692b969 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -360,6 +360,7 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://python.readthedocs.org/en/latest/", None), + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index eb5f766a7..5bcdc4d82 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -260,24 +260,24 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") - TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") INTERVAL = ScalarQueryParameterType("INTERVAL") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") + TIME = ScalarQueryParameterType("TIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index d1e9a45a5..ceab9b7e0 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,7 +16,11 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union + +import dateutil.relativedelta from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -329,18 +333,41 @@ class ScalarQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_ (Union[str, google.cloud.bigquery.query.ScalarQueryParameterType]): + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value (Union[ \ + str, int, float, dateutil.relativedelta.relativedelta, \ + decimal.Decimal, bool, datetime.datetime, datetime.date \ + ]): The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: Optional[ + Union[ + str, + int, + float, + dateutil.relativedelta.relativedelta, + decimal.Decimal, + bool, + datetime.datetime, + datetime.date, + ] + ], + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 2f57dd6f2..258f33a9d 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -27,6 +27,7 @@ import uuid from typing import Optional +from dateutil import relativedelta import psutil import pytest @@ -1852,6 +1853,18 @@ def test_query_w_query_params(self): rectangle_param = StructQueryParameter( "rectangle", top_left_param, bottom_right_param ) + interval_value = relativedelta.relativedelta( + years=123, + months=7, + days=-21, + hours=48, + minutes=24, + seconds=12, + microseconds=6, + ) + interval_param = ScalarQueryParameter( + "interval_param", enums.SqlParameterScalarTypes.INTERVAL, interval_value + ) examples = [ { "sql": "SELECT @question", @@ -1938,6 +1951,11 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @interval_param", + "expected": [interval_value], + "query_parameters": [interval_param], + }, ] if _BIGNUMERIC_SUPPORT: examples.append( From b73a61034434d7f33dc49e98c576c6e909f0adc7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 17:10:02 -0500 Subject: [PATCH 06/20] remove dead code --- tests/system/test_client.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 258f33a9d..6a3d29f2d 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -39,11 +39,6 @@ except ImportError: # pragma: NO COVER bigquery_storage = None -try: - import fastavro # to parse BQ storage client results -except ImportError: # pragma: NO COVER - fastavro = None - try: import pyarrow import pyarrow.types From 52f2b7b6ed7b0623b99a038eaff7d4de4300077b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 17:10:37 -0500 Subject: [PATCH 07/20] INTERVAL not supported in query parameters --- google/cloud/bigquery/enums.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 5bcdc4d82..0eaaffd2e 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -273,7 +273,6 @@ class SqlParameterScalarTypes: GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") INT64 = ScalarQueryParameterType("INT64") INTEGER = ScalarQueryParameterType("INT64") - INTERVAL = ScalarQueryParameterType("INTERVAL") NUMERIC = ScalarQueryParameterType("NUMERIC") STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") From 31c3f928e0f820cd86ac8bc1b7eced3ed15d6661 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 3 Aug 2021 15:56:07 -0500 Subject: [PATCH 08/20] revert query parameter changes --- google/cloud/bigquery/query.py | 43 +++++++--------------------------- tests/system/test_client.py | 18 -------------- 2 files changed, 8 insertions(+), 53 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index ceab9b7e0..d1e9a45a5 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,11 +16,7 @@ from collections import OrderedDict import copy -import datetime -import decimal -from typing import Optional, Union - -import dateutil.relativedelta +from typing import Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -333,41 +329,18 @@ class ScalarQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (Union[str, google.cloud.bigquery.query.ScalarQueryParameterType]): - Name of parameter type. See - :class:`google.cloud.bigquery.enums.SqlTypeNames` and - :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for - supported types. + type_ (str): + Name of parameter type. One of 'STRING', 'INT64', + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'DATE'. - value (Union[ \ - str, int, float, dateutil.relativedelta.relativedelta, \ - decimal.Decimal, bool, datetime.datetime, datetime.date \ - ]): + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): The scalar parameter value. """ - def __init__( - self, - name: Optional[str], - type_: Optional[Union[str, ScalarQueryParameterType]], - value: Optional[ - Union[ - str, - int, - float, - dateutil.relativedelta.relativedelta, - decimal.Decimal, - bool, - datetime.datetime, - datetime.date, - ] - ], - ): + def __init__(self, name, type_, value): self.name = name - if isinstance(type_, ScalarQueryParameterType): - self.type_ = type_._type - else: - self.type_ = type_ + self.type_ = type_ self.value = value @classmethod diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6a3d29f2d..f149c4cc7 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -27,7 +27,6 @@ import uuid from typing import Optional -from dateutil import relativedelta import psutil import pytest @@ -1848,18 +1847,6 @@ def test_query_w_query_params(self): rectangle_param = StructQueryParameter( "rectangle", top_left_param, bottom_right_param ) - interval_value = relativedelta.relativedelta( - years=123, - months=7, - days=-21, - hours=48, - minutes=24, - seconds=12, - microseconds=6, - ) - interval_param = ScalarQueryParameter( - "interval_param", enums.SqlParameterScalarTypes.INTERVAL, interval_value - ) examples = [ { "sql": "SELECT @question", @@ -1946,11 +1933,6 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, - { - "sql": "SELECT @interval_param", - "expected": [interval_value], - "query_parameters": [interval_param], - }, ] if _BIGNUMERIC_SUPPORT: examples.append( From bb0361886ac805f5ef05f6a31fc226026bc8c2bc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Aug 2021 16:30:47 -0500 Subject: [PATCH 09/20] add validation error for interval --- google/cloud/bigquery/_helpers.py | 7 ++++--- tests/unit/helpers/test_from_json.py | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index eea20b649..b9f368528 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -43,7 +43,7 @@ re.VERBOSE, ) -# BigQuery sends data in "canonical format" +# BigQuery sends INTERVAL data in "canonical format" # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type _INTERVAL_PATTERN = re.compile( r"(?P-?)(?P[0-9]+)-(?P[0-9]+) " @@ -52,8 +52,6 @@ r"(?P[0-9]+)\.?(?P[0-9]+)?$" ) -# TODO: BigQuery receives data in ISO 8601 duration format - _MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -136,6 +134,9 @@ def _interval_from_json( raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError(f"got interval: '{value}' with unexpected format") + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 years = calendar_sign * int(parsed.group("years")) months = calendar_sign * int(parsed.group("months")) diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py index 38306c2fa..9847ee3e0 100644 --- a/tests/unit/helpers/test_from_json.py +++ b/tests/unit/helpers/test_from_json.py @@ -39,6 +39,11 @@ def test_interval_from_json_w_none_required(mut): mut._interval_from_json(None, create_field(mode="REQUIRED")) +def test_interval_from_json_w_invalid_format(mut): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + mut._interval_from_json("NOT_AN_INTERVAL", create_field()) + + @pytest.mark.parametrize( ("value", "expected"), ( From f3711e7820d7cb58c8df395e8ac6e3c692e34d9a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Aug 2021 16:46:33 -0500 Subject: [PATCH 10/20] add unit tests for extreme intervals --- tests/unit/helpers/test_from_json.py | 35 +++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py index 9847ee3e0..12d451361 100644 --- a/tests/unit/helpers/test_from_json.py +++ b/tests/unit/helpers/test_from_json.py @@ -47,20 +47,43 @@ def test_interval_from_json_w_invalid_format(mut): @pytest.mark.parametrize( ("value", "expected"), ( - # SELECT INTERVAL -1 YEAR + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), ("-1-0 0 0:0:0", relativedelta(years=-1)), - # SELECT INTERVAL -1 MONTH + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), ("-0-1 0 0:0:0", relativedelta(months=-1)), - # SELECT INTERVAL -1 DAY + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), ("0-0 -1 0:0:0", relativedelta(days=-1)), - # SELECT INTERVAL -1 HOUR + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), ("0-0 0 -1:0:0", relativedelta(hours=-1)), - # SELECT INTERVAL -1 MINUTE + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), ("0-0 0 -0:1:0", relativedelta(minutes=-1)), - # SELECT INTERVAL -1 SECOND + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), # SELECT (INTERVAL -1 SECOND) / 1000000 ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), # Test with multiple digits in each section. ( "32-11 45 67:16:23.987654", From 68035bae2df27283af084eccd6dcc8e6ed7dc36e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 11:05:04 -0500 Subject: [PATCH 11/20] add dateutil to intersphinx --- owlbot.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/owlbot.py b/owlbot.py index 09845480a..4559d34f3 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -97,6 +97,9 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies=[ + ("dateutil", "https://dateutil.readthedocs.io/en/latest/") + ], ) # BigQuery has a custom multiprocessing note @@ -109,7 +112,7 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + ], ) # ---------------------------------------------------------------------------- @@ -121,14 +124,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -136,13 +139,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -156,7 +160,8 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) From 9f6b02dc0e99e4af80dc7a116315a5f673ebd585 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 11:08:14 -0500 Subject: [PATCH 12/20] use dictionary for intersphinx --- owlbot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/owlbot.py b/owlbot.py index 4559d34f3..5d7d8692b 100644 --- a/owlbot.py +++ b/owlbot.py @@ -97,9 +97,7 @@ samples=True, microgenerator=True, split_system_tests=True, - intersphinx_dependencies=[ - ("dateutil", "https://dateutil.readthedocs.io/en/latest/") - ], + intersphinx_dependencies={"dateutil": "https://dateutil.readthedocs.io/en/latest/"}, ) # BigQuery has a custom multiprocessing note From 7cccbd2c59e23195e2d1e6f5030c9e5a0b225c68 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 11 Aug 2021 16:10:28 +0000 Subject: [PATCH 13/20] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- docs/conf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index c1ed766b5..b417654e7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", @@ -361,12 +360,12 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://python.readthedocs.org/en/latest/", None), - "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), } From c4636fa202399396281101411c4bb2b1c8ae9f2f Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 16 Aug 2021 15:02:01 +0000 Subject: [PATCH 14/20] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/conf.py b/docs/conf.py index b417654e7..74a59b625 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", From 0497b1920b862cef7a2e5d94dbc94e3fddcebda6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 Sep 2021 13:32:49 -0500 Subject: [PATCH 15/20] add test case for trailing . --- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/helpers/test_from_json.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 9dd929e45..eb8066abc 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", - "python-dateutil >= 2.7.0, <3.0dev", + "python-dateutil >= 2.7.2, <3.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index bed6f6d0e..59913d588 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -18,7 +18,7 @@ pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 -python-dateutil==2.7.0 +python-dateutil==2.7.2 requests==2.18.0 Shapely==1.6.0 six==1.13.0 diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py index 12d451361..2b28fb625 100644 --- a/tests/unit/helpers/test_from_json.py +++ b/tests/unit/helpers/test_from_json.py @@ -123,7 +123,8 @@ def test_interval_from_json_w_invalid_format(mut): ), ), # Test with fraction that is not microseconds. - ("0-0 0 0:0:0.1", relativedelta(microseconds=100000)), + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), From 0318f54283739c9a9126beb319db42b894f4a9aa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 30 Sep 2021 13:39:12 -0500 Subject: [PATCH 16/20] explicit none --- google/cloud/bigquery/_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d3389b25f..27249d6bb 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -195,7 +195,7 @@ def _interval_from_json( ) -> Optional[relativedelta.relativedelta]: """Coerce 'value' to an interval, if set or not nullable.""" if not _not_null(value, field): - return + return None if value is None: raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") From 6b1f238448e5a7f36f9af80d09128d52228b0266 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 30 Sep 2021 18:44:40 +0000 Subject: [PATCH 17/20] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- docs/conf.py | 2 +- renovate.json | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8c5198e4f..0784da0b2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -367,8 +367,8 @@ "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None), } diff --git a/renovate.json b/renovate.json index 713c60bb4..c21036d38 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,9 @@ { "extends": [ - "config:base", "group:all", ":preserveSemverRanges" + "config:base", + "group:all", + ":preserveSemverRanges", + ":disableDependencyDashboard" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 54e47f7a4b43323c2da83c5ba748628897515a9f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Oct 2021 10:56:49 -0500 Subject: [PATCH 18/20] truncate nanoseconds --- google/cloud/bigquery/_helpers.py | 2 +- tests/unit/helpers/test_from_json.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 27249d6bb..abdcdce49 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -212,7 +212,7 @@ def _interval_from_json( minutes = time_sign * int(parsed.group("minutes")) seconds = time_sign * int(parsed.group("seconds")) fraction = parsed.group("fraction") - microseconds = time_sign * int(fraction.ljust(6, "0")) if fraction else 0 + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 return relativedelta.relativedelta( years=years, diff --git a/tests/unit/helpers/test_from_json.py b/tests/unit/helpers/test_from_json.py index 2b28fb625..65b054f44 100644 --- a/tests/unit/helpers/test_from_json.py +++ b/tests/unit/helpers/test_from_json.py @@ -128,6 +128,28 @@ def test_interval_from_json_w_invalid_format(mut): ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), ), ) def test_w_string_values(mut, value, expected): From dcc8b578caa472e4fff1b5063eb8c1807ca202f5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 4 Oct 2021 12:33:08 -0500 Subject: [PATCH 19/20] use \d group for digits --- google/cloud/bigquery/_helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index abdcdce49..0b1f90148 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -51,8 +51,7 @@ _INTERVAL_PATTERN = re.compile( r"(?P-?)(?P[0-9]+)-(?P[0-9]+) " r"(?P-?[0-9]+) " - r"(?P-?)(?P[0-9]+):(?P[0-9]+):" - r"(?P[0-9]+)\.?(?P[0-9]+)?$" + r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) _MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") From d62b95001908d58e4eccdd67124d7aa882b177c7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 26 Oct 2021 10:18:32 -0500 Subject: [PATCH 20/20] use \d for consistency --- google/cloud/bigquery/_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 3a640cb48..e95d38545 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -49,8 +49,8 @@ # BigQuery sends INTERVAL data in "canonical format" # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type _INTERVAL_PATTERN = re.compile( - r"(?P-?)(?P[0-9]+)-(?P[0-9]+) " - r"(?P-?[0-9]+) " + r"(?P-?)(?P\d+)-(?P\d+) " + r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" )