Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Process previously failed backfill events in the background #15585

Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
fd26164
Process previously failed backfill events in the background
MadLittleMods May 12, 2023
c5dc746
Add changelog
MadLittleMods May 12, 2023
8fc47d8
Add consideration
MadLittleMods May 12, 2023
b5d95f7
Fix lints
MadLittleMods May 12, 2023
ebc93be
Merge branch 'develop' into madlittlemods/process-previously-failed-e…
MadLittleMods May 16, 2023
e13f5a9
Always check for failed attempts
MadLittleMods May 16, 2023
70f5911
Add comments and concern about maybe queue
MadLittleMods May 16, 2023
45934fe
Process all failed events as a sequential task in the background
MadLittleMods May 16, 2023
b1998d7
Merge branch 'develop' into madlittlemods/process-previously-failed-e…
MadLittleMods May 16, 2023
93de856
Better comments
MadLittleMods May 16, 2023
631d7db
Add test for `separate_event_ids_with_failed_pull_attempts`
MadLittleMods May 16, 2023
beeccc3
Avoid doing extra work if the list is empty
MadLittleMods May 17, 2023
7eabc60
Make sure to retain the same order they were given in case the depth …
MadLittleMods May 17, 2023
7583c2c
Add comments why OrderedDict
MadLittleMods May 17, 2023
e101318
Make test more robust around ordering
MadLittleMods May 17, 2023
899fc34
Add test description
MadLittleMods May 17, 2023
b5aec4f
Same order separated results
MadLittleMods May 17, 2023
6edd126
Refactor to get_event_ids_with_failed_pull_attempts(...)
MadLittleMods May 17, 2023
d4b8ff7
Update comment doc
MadLittleMods May 17, 2023
6a0ec9d
Merge branch 'develop' into madlittlemods/process-previously-failed-e…
MadLittleMods May 18, 2023
d843557
Use List
MadLittleMods May 18, 2023
75bec52
Merge branch 'develop' into madlittlemods/process-previously-failed-e…
MadLittleMods May 23, 2023
c4e1533
Trace differentiaed events
MadLittleMods May 23, 2023
ec230a3
Prefer plain language
MadLittleMods May 24, 2023
22a69be
Use a `set` for efficient lookups
MadLittleMods May 24, 2023
65febed
Add some context
MadLittleMods May 24, 2023
6474b4e
Use dedicated `partition` function to separate list
MadLittleMods May 24, 2023
15527f7
Add context for why source order for MSC2716
MadLittleMods May 24, 2023
d59615f
Add sanity check test that failed pull attempt events are still proce…
MadLittleMods May 24, 2023
95ffa7c
Use obvious type
MadLittleMods May 25, 2023
50acf6a
Merge branch 'develop' into madlittlemods/process-previously-failed-e…
MadLittleMods May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/15585.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Process previously failed backfill events in the background to avoid blocking requests for something that is bound to fail again.
41 changes: 41 additions & 0 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,47 @@ async def _process_pulled_event(
)
return

# Check if we've already tried to process this event at some point in the past.
# We aren't concerned with the expontntial backoff here, just whether it has
# failed before.
failed_pull_attempt_info = await self._store.get_event_failed_pull_attempt_info(
event.room_id, event_id
)
if failed_pull_attempt_info:
# Process previously failed backfill events in the background to not waste
# time on something that is bound to fail again.
#
# TODO: Are we concerned with processing too many events in parallel since
# we just fire and forget this off to the background? Should we instead have
# a background queue to chew through?
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
run_as_background_process(
"_try_process_pulled_event",
self._try_process_pulled_event,
origin,
event,
backfilled,
)
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
else:
# Otherwise, we can optimistically try to process and wait for the event to
# be fully persisted.
await self._try_process_pulled_event(origin, event, backfilled)

async def _try_process_pulled_event(
self, origin: str, event: EventBase, backfilled: bool
) -> None:
"""
Handles all of the async tasks necessary to process a pulled event. You should
not use this method directly, instead use `_process_pulled_event` which will
handle all of the quick sync checks that should happen before-hand.

Params:
origin: The server we received this event from
events: The received event
backfilled: True if this is part of a historical batch of events (inhibits
notification to clients, and validation of device keys.)
"""
event_id = event.event_id

try:
try:
context = await self._compute_event_context_with_maybe_missing_prevs(
Expand Down
34 changes: 34 additions & 0 deletions synapse/storage/databases/main/event_federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ class BackfillQueueNavigationItem:
type: str


@attr.s(frozen=True, slots=True, auto_attribs=True)
class EventFailedPullAttemptInfo:
event_id: str
room_id: str
num_attempts: int
last_attempt_ts: int
last_cause: str


class _NoChainCoverIndex(Exception):
def __init__(self, room_id: str):
super().__init__("Unexpectedly no chain cover for events in %s" % (room_id,))
Expand Down Expand Up @@ -1583,6 +1592,31 @@ def _record_event_failed_pull_attempt_upsert_txn(

txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause))

@trace
async def get_event_failed_pull_attempt_info(
self,
room_id: str,
event_id: str,
) -> Optional[EventFailedPullAttemptInfo]:
res = await self.db_pool.simple_select_one(
table="event_failed_pull_attempts",
keyvalues={"room_id": room_id, "event_id": event_id},
retcols=["num_attempts", "last_attempt_ts", "last_cause"],
allow_none=True,
desc="get_event_failed_pull_attempt_info",
)

if res is None:
return None

return EventFailedPullAttemptInfo(
event_id=event_id,
room_id=room_id,
num_attempts=res["num_attempts"],
last_attempt_ts=res["last_attempt_ts"],
last_cause=res["last_cause"],
)

@trace
async def get_event_ids_to_not_pull_from_backoff(
self,
Expand Down