From cf38a53a9d8f5ce08aa423ede19161dac67c6595 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 14 Apr 2020 18:03:21 +0100 Subject: [PATCH 1/2] Fix a potentially-huge sql query We could end up looking up tens of thousands of events, which could cause large amounts of data to be logged to the postgres log. --- changelog.d/7274.bugfix | 1 + .../data_stores/main/event_federation.py | 20 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 changelog.d/7274.bugfix diff --git a/changelog.d/7274.bugfix b/changelog.d/7274.bugfix new file mode 100644 index 000000000000..211a38befc01 --- /dev/null +++ b/changelog.d/7274.bugfix @@ -0,0 +1 @@ +Fix a sql query introduced in Synapse 1.12.0 which could cause large amounts of logging to the postgres slow-query log. diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py index 62d4e9f59977..ba256e61b224 100644 --- a/synapse/storage/data_stores/main/event_federation.py +++ b/synapse/storage/data_stores/main/event_federation.py @@ -173,19 +173,25 @@ def _get_auth_chain_difference_txn( for event_id in initial_events } + # The sorted list of events whose auth chains we should walk. + search = [] # type: List[Tuple[int, str]] + # We need to get the depth of the initial events for sorting purposes. sql = """ SELECT depth, event_id FROM events WHERE %s - ORDER BY depth ASC """ - clause, args = make_in_list_sql_clause( - txn.database_engine, "event_id", initial_events - ) - txn.execute(sql % (clause,), args) + # the list can be huge, so let's avoid looking them all up in one massive + # query. + for batch in batch_iter(initial_events, 1000): + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", batch + ) + txn.execute(sql % (clause,), args) + search.extend(txn) - # The sorted list of events whose auth chains we should walk. - search = txn.fetchall() # type: List[Tuple[int, str]] + # sort by depth + search.sort() # Map from event to its auth events event_to_auth_events = {} # type: Dict[str, Set[str]] From 6227b0ff175726665da5554521939a2aa4d6e425 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 14 Apr 2020 18:48:24 +0100 Subject: [PATCH 2/2] Use txn.fetchall --- synapse/storage/data_stores/main/event_federation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py index ba256e61b224..b99439cc3784 100644 --- a/synapse/storage/data_stores/main/event_federation.py +++ b/synapse/storage/data_stores/main/event_federation.py @@ -188,7 +188,10 @@ def _get_auth_chain_difference_txn( txn.database_engine, "event_id", batch ) txn.execute(sql % (clause,), args) - search.extend(txn) + + # I think building a temporary list with fetchall is more efficient than + # just `search.extend(txn)`, but this is unconfirmed + search.extend(txn.fetchall()) # sort by depth search.sort()