Skip to content

Commit

Permalink
Merge pull request #552 from tseaver/447-require_key_dataset_ids_matc…
Browse files Browse the repository at this point in the history
…h_branch

Require that keys for 'put' / 'delete' match the 'dataset_id' of the batch
  • Loading branch information
tseaver committed Jan 28, 2015
2 parents 50f0684 + 49977da commit 2c75763
Show file tree
Hide file tree
Showing 6 changed files with 489 additions and 146 deletions.
115 changes: 71 additions & 44 deletions gcloud/datastore/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,38 @@
from gcloud.datastore import helpers


def _require_dataset_id(dataset_id=None):
def _require_dataset_id(dataset_id=None, first_key=None):
"""Infer a dataset ID from the environment, if not passed explicitly.
Order of precedence:
- Passed `dataset_id` (if not None).
- `dataset_id` of current batch / transaction (if current exists).
- `dataset_id` of first key
- `dataset_id` inferred from the environment (if `set_default_dataset_id`
has been called).
:type dataset_id: string
:param dataset_id: Optional.
:type first_key: :class:`gcloud.datastore.key.Key` or None
:param first_key: Optional: first key being manipulated.
:rtype: string
:returns: A dataset ID based on the current environment.
:raises: :class:`EnvironmentError` if ``dataset_id`` is ``None``,
and cannot be inferred from the environment.
"""
if dataset_id is None:
if _implicit_environ.DATASET_ID is None:
raise EnvironmentError('Dataset ID could not be inferred.')
dataset_id = _implicit_environ.DATASET_ID
return dataset_id
if dataset_id is not None:
return dataset_id
top = Batch.current()
if top is not None:
return top.dataset_id
if first_key is not None:
return first_key.dataset_id
if _implicit_environ.DATASET_ID is None:
raise EnvironmentError('Dataset ID could not be inferred.')
return _implicit_environ.DATASET_ID


def _require_connection(connection=None):
Expand All @@ -54,38 +70,17 @@ def _require_connection(connection=None):
cannot be inferred from the environment.
"""
if connection is None:
if _implicit_environ.CONNECTION is None:
raise EnvironmentError('Connection could not be inferred.')
connection = _implicit_environ.CONNECTION
top = Batch.current()
if top is not None:
connection = top.connection
else:
if _implicit_environ.CONNECTION is None:
raise EnvironmentError('Connection could not be inferred.')
connection = _implicit_environ.CONNECTION
return connection


def _get_dataset_id_from_keys(keys):
"""Determines dataset ID from a list of keys.
:type keys: list of :class:`gcloud.datastore.key.Key`
:param keys: The keys from the same dataset.
:rtype: string
:returns: The dataset ID of the keys.
:raises: :class:`ValueError` if the key dataset IDs don't agree.
"""
if any(key is None for key in keys):
raise ValueError('None not allowed')

dataset_id = keys[0].dataset_id
# Rather than creating a list or set of all dataset IDs, we iterate
# and check. We could allow the backend to check this for us if IDs
# with no prefix worked (GoogleCloudPlatform/google-cloud-datastore#59)
# or if we made sure that a prefix s~ or e~ was on each key.
for key in keys[1:]:
if key.dataset_id != dataset_id:
raise ValueError('All keys in get must be from the same dataset.')

return dataset_id


def get(keys, missing=None, deferred=None, connection=None):
def get(keys, missing=None, deferred=None, connection=None, dataset_id=None):
"""Retrieves entities, along with their attributes.
:type keys: list of :class:`gcloud.datastore.key.Key`
Expand All @@ -103,22 +98,35 @@ def get(keys, missing=None, deferred=None, connection=None):
:type connection: :class:`gcloud.datastore.connection.Connection`
:param connection: Optional. The connection used to connect to datastore.
If not passed, inferred from the environment.
:type dataset_id: :class:`gcloud.datastore.connection.Connection`
:param dataset_id: Optional. The dataset ID used to connect to datastore.
If not passed, inferred from the environment.
:rtype: list of :class:`gcloud.datastore.entity.Entity`
:returns: The requested entities.
:raises: EnvironmentError if ``connection`` or ``dataset_id`` not passed,
and cannot be inferred from the environment. ValueError if
one or more of ``keys`` has a dataset ID which does not match
the passed / inferred dataset ID.
"""
if not keys:
return []

connection = _require_connection(connection)
dataset_id = _get_dataset_id_from_keys(keys)
dataset_id = _require_dataset_id(dataset_id, keys[0])

if list(set([key.dataset_id for key in keys])) != [dataset_id]:
raise ValueError('Keys do not match dataset ID')

transaction = Transaction.current()

entity_pbs = connection.lookup(
dataset_id=dataset_id,
key_pbs=[k.to_protobuf() for k in keys],
missing=missing, deferred=deferred,
missing=missing,
deferred=deferred,
transaction_id=transaction and transaction.id,
)

Expand All @@ -139,51 +147,70 @@ def get(keys, missing=None, deferred=None, connection=None):
return entities


def put(entities, connection=None):
def put(entities, connection=None, dataset_id=None):
"""Save the entities in the Cloud Datastore.
:type entities: list of :class:`gcloud.datastore.entity.Entity`
:param entities: The entities to be saved to the datastore.
:type connection: :class:`gcloud.datastore.connection.Connection`
:param connection: Optional connection used to connect to datastore.
If not passed, inferred from the environment.
:type dataset_id: :class:`gcloud.datastore.connection.Connection`
:param dataset_id: Optional. The dataset ID used to connect to datastore.
If not passed, inferred from the environment.
:raises: EnvironmentError if ``connection`` or ``dataset_id`` not passed,
and cannot be inferred from the environment. ValueError if
one or more entities has a key with a dataset ID not matching
the passed / inferred dataset ID.
"""
if not entities:
return

connection = connection or _implicit_environ.CONNECTION
connection = _require_connection(connection)
dataset_id = _require_dataset_id(dataset_id, entities[0].key)

current = Batch.current()
in_batch = current is not None
if not in_batch:
keys = [entity.key for entity in entities]
dataset_id = _get_dataset_id_from_keys(keys)
current = Batch(dataset_id=dataset_id, connection=connection)
for entity in entities:
current.put(entity)
if not in_batch:
current.commit()


def delete(keys, connection=None):
def delete(keys, connection=None, dataset_id=None):
"""Delete the keys in the Cloud Datastore.
:type keys: list of :class:`gcloud.datastore.key.Key`
:param keys: The keys to be deleted from the datastore.
:type connection: :class:`gcloud.datastore.connection.Connection`
:param connection: Optional connection used to connect to datastore.
If not passed, inferred from the environment.
:type dataset_id: :class:`gcloud.datastore.connection.Connection`
:param dataset_id: Optional. The dataset ID used to connect to datastore.
If not passed, inferred from the environment.
:raises: EnvironmentError if ``connection`` or ``dataset_id`` not passed,
and cannot be inferred from the environment. ValueError if
one or more keys has a dataset ID not matching the passed /
inferred dataset ID.
"""
if not keys:
return

connection = connection or _implicit_environ.CONNECTION
connection = _require_connection(connection)
dataset_id = _require_dataset_id(dataset_id, keys[0])

# We allow partial keys to attempt a delete, the backend will fail.
current = Batch.current()
in_batch = current is not None
if not in_batch:
dataset_id = _get_dataset_id_from_keys(keys)
current = Batch(dataset_id=dataset_id, connection=connection)
for key in keys:
current.delete(key)
Expand Down
14 changes: 12 additions & 2 deletions gcloud/datastore/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,16 @@ def put(self, entity):
:type entity: :class:`gcloud.datastore.entity.Entity`
:param entity: the entity to be saved.
:raises: ValueError if entity has no key assigned.
:raises: ValueError if entity has no key assigned, or if the key's
``dataset_id`` does not match ours.
"""
if entity.key is None:
raise ValueError("Entity must have a key")

if not helpers._dataset_ids_equal(self._dataset_id,
entity.key.dataset_id):
raise ValueError("Key must be from same dataset as batch")

_assign_entity_to_mutation(
self.mutation, entity, self._auto_id_entities)

Expand All @@ -224,11 +229,16 @@ def delete(self, key):
:type key: :class:`gcloud.datastore.key.Key`
:param key: the key to be deleted.
:raises: ValueError if key is not complete.
:raises: ValueError if key is not complete, or if the key's
``dataset_id`` does not match ours.
"""
if key.is_partial:
raise ValueError("Key must be complete")

if not helpers._dataset_ids_equal(self._dataset_id,
key.dataset_id):
raise ValueError("Key must be from same dataset as batch")

key_pb = key.to_protobuf()
helpers._add_keys_to_request(self.mutation.delete, [key_pb])

Expand Down
52 changes: 52 additions & 0 deletions gcloud/datastore/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,55 @@ def _add_keys_to_request(request_field_pb, key_pbs):
for key_pb in key_pbs:
key_pb = _prepare_key_for_request(key_pb)
request_field_pb.add().CopyFrom(key_pb)


def _dataset_ids_equal(dataset_id1, dataset_id2):
"""Compares two dataset IDs for fuzzy equality.
Each may be prefixed or unprefixed (but not null, since dataset ID
is required on a key). The only allowed prefixes are 's~' and 'e~'.
Two identical prefixed match
>>> 's~foo' == 's~foo'
>>> 'e~bar' == 'e~bar'
while non-identical prefixed don't
>>> 's~foo' != 's~bar'
>>> 's~foo' != 'e~foo'
As for non-prefixed, they can match other non-prefixed or
prefixed:
>>> 'foo' == 'foo'
>>> 'foo' == 's~foo'
>>> 'foo' == 'e~foo'
>>> 'foo' != 'bar'
>>> 'foo' != 's~bar'
(Ties are resolved since 'foo' can only be an alias for one of
s~foo or e~foo in the backend.)
:type dataset_id1: string
:param dataset_id1: A dataset ID.
:type dataset_id2: string
:param dataset_id2: A dataset ID.
:rtype: boolean
:returns: Boolean indicating if the IDs are the same.
"""
if dataset_id1 == dataset_id2:
return True

if dataset_id1.startswith('s~') or dataset_id1.startswith('e~'):
# If `dataset_id1` is prefixed and not matching, then the only way
# they can match is if `dataset_id2` is unprefixed.
return dataset_id1[2:] == dataset_id2
elif dataset_id2.startswith('s~') or dataset_id2.startswith('e~'):
# Here we know `dataset_id1` is unprefixed and `dataset_id2`
# is prefixed.
return dataset_id1 == dataset_id2[2:]

return False
Loading

0 comments on commit 2c75763

Please sign in to comment.