Skip to content

Commit

Permalink
Refactoring remove_kind to make while loop clearer.
Browse files Browse the repository at this point in the history
Also stumbled upon other datastore gotchas (non-ancestor
deletes in a transaction) and added printing to indicate
what is being deleted.
  • Loading branch information
dhermes committed Nov 12, 2014
1 parent 25179c9 commit 5ad0798
Showing 1 changed file with 47 additions and 12 deletions.
59 changes: 47 additions & 12 deletions regression/clear_datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,65 @@
'Person',
'Post',
]
TRANSACTION_MAX_GROUPS = 5


def remove_kind(dataset, kind):
def fetch_keys(dataset, kind, fetch_max=FETCH_MAX, query=None):
if query is None:
query = dataset.query(kind=kind).limit(
fetch_max).projection(['__key__'])
# Make new query with start cursor if a previously set cursor
# exists.
if query._cursor is not None:
query = query.with_cursor(query.cursor())

return query, query.fetch()


def get_ancestors(entities):
# NOTE: A key will always have at least one path element.
key_roots = [entity.key().path()[0] for entity in entities]
# Turn into hashable type so we can use set to get unique roots.
# Also sorted the items() to ensure uniqueness.
key_roots = [tuple(sorted(root.items())) for root in key_roots]
# Cast back to dictionary.
return [dict(root) for root in set(key_roots)]


def delete_entities(dataset, entities):
dataset_id = dataset.id()
connection = dataset.connection()

key_pbs = [entity.key().to_protobuf() for entity in entities]
connection.delete_entities(dataset_id, key_pbs)


def remove_kind(dataset, kind):
delete_outside_transaction = False
with dataset.transaction():
query = dataset.query(kind=kind).limit(
FETCH_MAX).projection(['__key__'])
results = []
more_results = True
while more_results:
# Make new query.
if query._cursor is not None:
query = query.with_cursor(query._cursor)

curr_results = query.fetch()
query, curr_results = fetch_keys(dataset, kind)
results.extend(curr_results)
while curr_results:
query, curr_results = fetch_keys(dataset, kind, query=query)
results.extend(curr_results)

more_results = len(curr_results) == FETCH_MAX
if not results:
return

# Now that we have all results, we seek to delete.
key_pbs = [entity.key().to_protobuf() for entity in results]
connection.delete_entities(dataset_id, key_pbs)
print 'Deleting keys:'
print results

ancestors = get_ancestors(results)
if len(ancestors) > TRANSACTION_MAX_GROUPS:
delete_outside_transaction = True
else:
delete_entities(dataset, results)

if delete_outside_transaction:
delete_entities(dataset, results)


def remove_all_entities():
Expand Down

0 comments on commit 5ad0798

Please sign in to comment.