Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding HappyBase Connection.create_table(). #1502

Merged
merged 1 commit into from
Feb 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 124 additions & 3 deletions gcloud/bigtable/happybase/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@
"""Google Cloud Bigtable HappyBase connection module."""


import datetime
import warnings

import six

from gcloud.bigtable.client import Client
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule
from gcloud.bigtable.happybase.table import Table
from gcloud.bigtable.table import Table as _LowLevelTable

Expand Down Expand Up @@ -124,9 +128,6 @@ class Connection(object):
:type kwargs: dict
:param kwargs: Remaining keyword arguments. Provided for HappyBase
compatibility.

:raises: :class:`ValueError <exceptions.ValueError>` if any of the unused
parameters are specified with a value other than the defaults.
"""

_cluster = None
Expand Down Expand Up @@ -265,6 +266,77 @@ def tables(self):

return table_names

def create_table(self, name, families):
"""Create a table.

.. warning::

The only column family options from HappyBase that are able to be
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.

.. note::

This method is **not** atomic. The Cloud Bigtable API separates

This comment was marked as spam.

This comment was marked as spam.

the creation of a table from the creation of column families. Thus
this method needs to send 1 request for the table creation and 1
request for each column family. If any of these fails, the method
will fail, but the progress made towards completion cannot be
rolled back.

Values in ``families`` represent column family options. In HappyBase,
these are dictionaries, corresponding to the ``ColumnDescriptor``
structure in the Thrift API. The accepted keys are:

* ``max_versions`` (``int``)
* ``compression`` (``str``)
* ``in_memory`` (``bool``)
* ``bloom_filter_type`` (``str``)
* ``bloom_filter_vector_size`` (``int``)
* ``bloom_filter_nb_hashes`` (``int``)
* ``block_cache_enabled`` (``bool``)
* ``time_to_live`` (``int``)

:type name: str
:param name: The name of the table to be created.

:type families: dict
:param families: Dictionary with column family names as keys and column
family options as the values. The options can be among

* :class:`dict`
* :class:`.GarbageCollectionRule`

This comment was marked as spam.


:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
not a dictionary,
:class:`ValueError <exceptions.ValueError>` if ``families``
has no entries
"""
if not isinstance(families, dict):
raise TypeError('families arg must be a dictionary')

if not families:
raise ValueError('Cannot create table %r (no column '
'families specified)' % (name,))

# Parse all keys before making any API requests.
gc_rule_dict = {}
for column_family_name, option in families.items():
if isinstance(column_family_name, six.binary_type):
column_family_name = column_family_name.decode('utf-8')
if column_family_name.endswith(':'):
column_family_name = column_family_name[:-1]
gc_rule_dict[column_family_name] = _parse_family_option(option)

# Create table instance and then make API calls.
name = self._table_name(name)
low_level_table = _LowLevelTable(name, self._cluster)
low_level_table.create()

for column_family_name, gc_rule in gc_rule_dict.items():
column_family = low_level_table.column_family(
column_family_name, gc_rule=gc_rule)
column_family.create()

def delete_table(self, name, disable=False):
"""Delete the specified table.

Expand Down Expand Up @@ -336,3 +408,52 @@ def compact_table(self, name, major=False):
"""
raise NotImplementedError('The Cloud Bigtable API does not support '
'compacting a table.')


def _parse_family_option(option):
"""Parses a column family option into a garbage collection rule.

.. note::

If ``option`` is not a dictionary, the type is not checked.
If ``option`` is :data:`None`, there is nothing to do, since this
is the correct output.

:type option: :class:`dict`,
:data:`NoneType <types.NoneType>`,
:class:`.GarbageCollectionRule`
:param option: A column family option passes as a dictionary value in
:meth:`Connection.create_table`.

:rtype: :class:`.GarbageCollectionRule`
:returns: A garbage collection rule parsed from the input.
"""
result = option
if isinstance(result, dict):
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
all_keys = ', '.join(repr(key) for key in result.keys())
warning_msg = ('Cloud Bigtable only supports max_versions and '
'time_to_live column family settings. '
'Received: %s' % (all_keys,))
_WARN(warning_msg)

max_num_versions = result.get('max_versions')
max_age = None
if 'time_to_live' in result:
max_age = datetime.timedelta(seconds=result['time_to_live'])

versions_rule = age_rule = None
if max_num_versions is not None:
versions_rule = MaxVersionsGCRule(max_num_versions)
if max_age is not None:
age_rule = MaxAgeGCRule(max_age)

if versions_rule is None:
result = age_rule
else:
if age_rule is None:
result = versions_rule
else:
result = GCRuleIntersection(rules=[age_rule, versions_rule])

return result
189 changes: 189 additions & 0 deletions gcloud/bigtable/happybase/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,90 @@ def test_tables_with_prefix(self):
result = connection.tables()
self.assertEqual(result, [unprefixed_table_name1])

def test_create_table(self):
import operator
from gcloud._testing import _Monkey
from gcloud.bigtable.happybase import connection as MUT

cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)
mock_gc_rule = object()
called_options = []

def mock_parse_family_option(option):
called_options.append(option)
return mock_gc_rule

name = 'table-name'
col_fam1 = 'cf1'
col_fam_option1 = object()
col_fam2 = u'cf2'
col_fam_option2 = object()
col_fam3 = b'cf3'
col_fam_option3 = object()
families = {
col_fam1: col_fam_option1,
# A trailing colon is also allowed.
col_fam2 + ':': col_fam_option2,
col_fam3 + b':': col_fam_option3,
}

tables_created = []

def make_table(*args, **kwargs):
result = _MockLowLevelTable(*args, **kwargs)
tables_created.append(result)
return result

with _Monkey(MUT, _LowLevelTable=make_table,
_parse_family_option=mock_parse_family_option):
connection.create_table(name, families)

# Just one table would have been created.
table_instance, = tables_created
self.assertEqual(table_instance.args, (name, cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.create_calls, 1)

# Check if our mock was called twice, but we don't know the order.
self.assertEqual(
set(called_options),
set([col_fam_option1, col_fam_option2, col_fam_option3]))

# We expect three column family instances created, but don't know the
# order due to non-deterministic dict.items().
col_fam_created = table_instance.col_fam_created
self.assertEqual(len(col_fam_created), 3)
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[0].create_calls, 1)
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[1].create_calls, 1)
self.assertEqual(col_fam_created[2].column_family_id,
col_fam3.decode('utf-8'))
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[2].create_calls, 1)

def test_create_table_bad_type(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)

name = 'table-name'
families = None
with self.assertRaises(TypeError):
connection.create_table(name, families)

def test_create_table_bad_value(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)

name = 'table-name'
families = {}
with self.assertRaises(ValueError):
connection.create_table(name, families)

def test_delete_table(self):
from gcloud._testing import _Monkey
from gcloud.bigtable.happybase import connection as MUT
Expand Down Expand Up @@ -376,6 +460,90 @@ def test_compact_table(self):
connection.compact_table(name, major=major)


class Test__parse_family_option(unittest2.TestCase):

def _callFUT(self, option):
from gcloud.bigtable.happybase.connection import _parse_family_option
return _parse_family_option(option)

def test_dictionary_no_keys(self):
option = {}
result = self._callFUT(option)
self.assertEqual(result, None)

def test_null(self):
option = None
result = self._callFUT(option)
self.assertEqual(result, None)

def test_dictionary_bad_key(self):
from gcloud._testing import _Monkey
from gcloud.bigtable.happybase import connection as MUT

warned = []

def mock_warn(msg):
warned.append(msg)

option = {'badkey': None}
with _Monkey(MUT, _WARN=mock_warn):
result = self._callFUT(option)

self.assertEqual(result, None)
self.assertEqual(len(warned), 1)
self.assertIn('badkey', warned[0])

def test_dictionary_versions_key(self):
from gcloud.bigtable.column_family import MaxVersionsGCRule

versions = 42
option = {'max_versions': versions}
result = self._callFUT(option)

gc_rule = MaxVersionsGCRule(versions)
self.assertEqual(result, gc_rule)

def test_dictionary_ttl_key(self):
import datetime
from gcloud.bigtable.column_family import MaxAgeGCRule

time_to_live = 24 * 60 * 60
max_age = datetime.timedelta(days=1)
option = {'time_to_live': time_to_live}
result = self._callFUT(option)

gc_rule = MaxAgeGCRule(max_age)
self.assertEqual(result, gc_rule)

def test_dictionary_both_keys(self):
import datetime
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule

versions = 42
time_to_live = 24 * 60 * 60
option = {
'max_versions': versions,
'time_to_live': time_to_live,
}
result = self._callFUT(option)

max_age = datetime.timedelta(days=1)
# NOTE: This relies on the order of the rules in the method we are
# calling matching this order here.
gc_rule1 = MaxAgeGCRule(max_age)
gc_rule2 = MaxVersionsGCRule(versions)
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
self.assertEqual(result, gc_rule)

def test_non_dictionary(self):
option = object()
self.assertFalse(isinstance(option, dict))
result = self._callFUT(option)
self.assertEqual(result, option)


class _Client(object):

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -418,12 +586,33 @@ def list_tables(self):
return self.list_tables_result


class _MockLowLevelColumnFamily(object):

def __init__(self, column_family_id, gc_rule=None):
self.column_family_id = column_family_id
self.gc_rule = gc_rule
self.create_calls = 0

def create(self):
self.create_calls += 1


class _MockLowLevelTable(object):

def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.delete_calls = 0
self.create_calls = 0
self.col_fam_created = []

def delete(self):
self.delete_calls += 1

def create(self):
self.create_calls += 1

def column_family(self, column_family_id, gc_rule=None):
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
self.col_fam_created.append(result)
return result