Skip to content

Commit

Permalink
Merge pull request #2787 from tseaver/2354-bigquery-nested_data_types
Browse files Browse the repository at this point in the history
Correctly model JSON repr of complex nested records.
  • Loading branch information
tseaver authored Dec 2, 2016
2 parents d775489 + ab9278f commit 8ff90db
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 12 deletions.
9 changes: 5 additions & 4 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ def _record_from_json(value, field):
"""Coerce 'value' to a mapping, if set or not nullable."""
if _not_null(value, field):
record = {}
for subfield, cell in zip(field.fields, value['f']):
record_iter = zip(field.fields, value['f'])
for subfield, cell in record_iter:
converter = _CELLDATA_FROM_JSON[subfield.field_type]
if field.mode == 'REPEATED':
value = [converter(item, subfield) for item in cell['v']]
if subfield.mode == 'REPEATED':
value = [converter(item['v'], subfield) for item in cell['v']]
else:
value = converter(cell['v'], subfield)
record[subfield.name] = value
Expand Down Expand Up @@ -104,7 +105,7 @@ def _row_from_json(row, schema):
for field, cell in zip(schema, row['f']):
converter = _CELLDATA_FROM_JSON[field.field_type]
if field.mode == 'REPEATED':
row_data.append([converter(item, field)
row_data.append([converter(item['v'], field)
for item in cell['v']])
else:
row_data.append(converter(cell['v'], field))
Expand Down
97 changes: 94 additions & 3 deletions bigquery/unit_tests/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_w_scalar_subfield(self):
def test_w_repeated_subfield(self):
subfield = _Field('REPEATED', 'color', 'STRING')
field = _Field('REQUIRED', fields=[subfield])
value = {'f': [{'v': ['red', 'yellow', 'blue']}]}
value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}
coerced = self._call_fut(value, field)
self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})

Expand Down Expand Up @@ -234,6 +234,97 @@ def test_w_string_value(self):
self.assertEqual(coerced, 'Wonderful!')


class Test_row_from_json(unittest.TestCase):

def _call_fut(self, row, schema):
from google.cloud.bigquery._helpers import _row_from_json
return _row_from_json(row, schema)

def test_w_single_scalar_column(self):
# SELECT 1 AS col
col = _Field('REQUIRED', 'col', 'INTEGER')
row = {u'f': [{u'v': u'1'}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_single_struct_column(self):
# SELECT (1, 2) AS col
sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
self.assertEqual(self._call_fut(row, schema=[col]),
({'sub_1': 1, 'sub_2': 2},))

def test_w_single_array_column(self):
# SELECT [1, 2, 3] as col
col = _Field('REPEATED', 'col', 'INTEGER')
row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
self.assertEqual(self._call_fut(row, schema=[col]),
([1, 2, 3],))

def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REPEATED', 'third', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD',
fields=[first, second, third])
row = {
u'f': [
{u'v': {
u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}]},
{u'v': u'3'},
{u'v': [{u'v': u'4'}, {u'v': u'5'}]}
]
}},
]
}
self.assertEqual(
self._call_fut(row, schema=[col]),
({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))

def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field('REQUIRED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REQUIRED', 'third', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD',
fields=[first, second, third])
row = {u'f': [{u'v': [
{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
{u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': 1, u'second': 2, u'third': 3},
{u'first': 4, u'second': 5, u'third': 6},
],))

def test_w_array_of_struct_w_array(self):
# SELECT [([1, 2, 3], 4), ([5, 6], 7)]
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
row = {u'f': [{u'v': [
{u'v': {u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
{u'v': u'4'}
]}},
{u'v': {u'f': [
{u'v': [{u'v': u'5'}, {u'v': u'6'}]},
{u'v': u'7'}
]}}
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': [1, 2, 3], u'second': 4},
{u'first': [5, 6], u'second': 7},
],))


class Test_rows_from_json(unittest.TestCase):

def _call_fut(self, value, field):
Expand All @@ -253,12 +344,12 @@ def test_w_record_subfield(self):
{'f': [
{'v': 'Phred Phlyntstone'},
{'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
{'v': ['orange', 'black']},
{'v': [{'v': 'orange'}, {'v': 'black'}]},
]},
{'f': [
{'v': 'Bharney Rhubble'},
{'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
{'v': ['brown']},
{'v': [{'v': 'brown'}]},
]},
{'f': [
{'v': 'Wylma Phlyntstone'},
Expand Down
15 changes: 10 additions & 5 deletions bigquery/unit_tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self):
'pageToken': TOKEN,
'rows': [
{'f': [
{'v': ['red', 'green']},
{'v': [{'f': [{'v': ['1', '2']},
{'v': ['3.1415', '1.414']}]}]},
{'v': [{'v': 'red'}, {'v': 'green'}]},
{'v': [{
'v': {
'f': [
{'v': [{'v': '1'}, {'v': '2'}]},
{'v': [{'v': '3.1415'}, {'v': '1.414'}]},
]}
}]},
]},
]
}
conn = _Connection(DATA)
client = _Client(project=self.PROJECT, connection=conn)
dataset = _Dataset(client)
full_name = SchemaField('color', 'STRING', mode='REPEATED')
color = SchemaField('color', 'STRING', mode='REPEATED')
index = SchemaField('index', 'INTEGER', 'REPEATED')
score = SchemaField('score', 'FLOAT', 'REPEATED')
struct = SchemaField('struct', 'RECORD', mode='REPEATED',
fields=[index, score])
table = self._make_one(self.TABLE_NAME, dataset=dataset,
schema=[full_name, struct])
schema=[color, struct])

iterator = table.fetch_data()
page = six.next(iterator.pages)
Expand Down
46 changes: 46 additions & 0 deletions system_tests/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,3 +478,49 @@ def _job_done(instance):
# them here. The best we can do is not that the API call didn't
# raise an error, and that the job completed (in the `retry()`
# above).

def test_sync_query_w_nested_arrays_and_structs(self):
EXAMPLES = [
{
'sql': 'SELECT 1',
'expected': 1,
},
{
'sql': 'SELECT (1, 2)',
'expected': {'_field_1': 1, '_field_2': 2},
},
{
'sql': 'SELECT [1, 2, 3]',
'expected': [1, 2, 3],
},
{
'sql': 'SELECT ([1, 2], 3, [4, 5])',
'expected':
{'_field_1': [1, 2], '_field_2': 3, '_field_3': [4, 5]},
},
{
'sql': 'SELECT [(1, 2, 3), (4, 5, 6)]',
'expected': [
{'_field_1': 1, '_field_2': 2, '_field_3': 3},
{'_field_1': 4, '_field_2': 5, '_field_3': 6},
],
},
{
'sql': 'SELECT [([1, 2, 3], 4), ([5, 6], 7)]',
'expected': [
{u'_field_1': [1, 2, 3], u'_field_2': 4},
{u'_field_1': [5, 6], u'_field_2': 7},
],
},
{
'sql': 'SELECT ARRAY(SELECT STRUCT([1, 2]))',
'expected': [{u'_field_1': [1, 2]}],
},
]
for example in EXAMPLES:
query = Config.CLIENT.run_sync_query(example['sql'])
query.use_legacy_sql = False
query.run()
self.assertEqual(len(query.rows), 1)
self.assertEqual(len(query.rows[0]), 1)
self.assertEqual(query.rows[0][0], example['expected'])

0 comments on commit 8ff90db

Please sign in to comment.