Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Clean up _update_auth_events_and_context_for_auth #11122

1 change: 1 addition & 0 deletions changelog.d/11122.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Clean up some of the federation event authentication code for clarity.
150 changes: 37 additions & 113 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -1417,13 +1417,8 @@ async def _check_event_auth(
}

try:
(
context,
auth_events_for_auth,
) = await self._update_auth_events_and_context_for_auth(
origin,
updated_auth_events = await self._update_auth_events_for_auth(
event,
context,
calculated_auth_event_map=calculated_auth_event_map,
)
except Exception:
Expand All @@ -1436,6 +1431,14 @@ async def _check_event_auth(
"Ignoring failure and continuing processing of event.",
event.event_id,
)
updated_auth_events = None

if updated_auth_events:
context = await self._update_context_for_auth_events(
event, context, updated_auth_events
)
auth_events_for_auth = updated_auth_events
else:
auth_events_for_auth = calculated_auth_event_map

try:
Expand Down Expand Up @@ -1560,13 +1563,11 @@ async def _check_for_soft_fail(
soft_failed_event_counter.inc()
event.internal_metadata.soft_failed = True

async def _update_auth_events_and_context_for_auth(
async def _update_auth_events_for_auth(
self,
origin: str,
event: EventBase,
context: EventContext,
calculated_auth_event_map: StateMap[EventBase],
) -> Tuple[EventContext, StateMap[EventBase]]:
) -> Optional[StateMap[EventBase]]:
"""Helper for _check_event_auth. See there for docs.

Checks whether a given event has the expected auth events. If it
Expand All @@ -1579,96 +1580,27 @@ async def _update_auth_events_and_context_for_auth(
processing of the event.

Args:
origin:
event:
context:

calculated_auth_event_map:
Our calculated auth_events based on the state of the room
at the event's position in the DAG.

Returns:
updated context, updated auth event map
updated auth event map, or None if no changes are needed.

"""
assert not event.internal_metadata.outlier

# take a copy of calculated_auth_event_map before we modify it.
auth_events: MutableStateMap[EventBase] = dict(calculated_auth_event_map)

# check for events which are in the event's claimed auth_events, but not
# in our calculated event map.
event_auth_events = set(event.auth_event_ids())

# missing_auth is the set of the event's auth_events which we don't yet have
# in auth_events.
missing_auth = event_auth_events.difference(
e.event_id for e in auth_events.values()
)

# if we have missing events, we need to fetch those events from somewhere.
#
# we start by checking if they are in the store, and then try calling /event_auth/.
#
# TODO: this code is now redundant, since it should be impossible for us to
# get here without already having the auth events.
if missing_auth:
have_events = await self._store.have_seen_events(
event.room_id, missing_auth
)
logger.debug("Events %s are in the store", have_events)
missing_auth.difference_update(have_events)
Copy link
Member Author

@richvdh richvdh Oct 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at this point, missing_auth must be an empty set, because we all of the events in event_auth_events must be in our database (otherwise we would have rejected the event sooner, thanks to the changes in #11001).

(I toyed with the idea of changing this to an assertion, but the have_seen_events lookup isn't entirely negligible and I figured it was better just to clear it out.)


# missing_auth is now the set of event_ids which:
# a. are listed in event.auth_events, *and*
# b. are *not* part of our calculated auth events based on room state, *and*
# c. are *not* yet in our database.

if missing_auth:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... which means that this is never true.

# If we don't have all the auth events, we need to get them.
logger.info("auth_events contains unknown events: %s", missing_auth)
try:
await self._get_remote_auth_chain_for_event(
origin, event.room_id, event.event_id
)
except Exception:
logger.exception("Failed to get auth chain")
else:
# load any auth events we might have persisted from the database. This
# has the side-effect of correctly setting the rejected_reason on them.
auth_events.update(
{
(ae.type, ae.state_key): ae
for ae in await self._store.get_events_as_list(
missing_auth, allow_rejected=True
)
}
)

# auth_events now contains
# 1. our *calculated* auth events based on the room state, plus:
# 2. any events which:
# a. are listed in `event.auth_events`, *and*
# b. are not part of our calculated auth events, *and*
# c. were not in our database before the call to /event_auth
# d. have since been added to our database (most likely by /event_auth).

different_auth = event_auth_events.difference(
e.event_id for e in auth_events.values()
e.event_id for e in calculated_auth_event_map.values()
)

# different_auth is the set of events which *are* in `event.auth_events`, but
# which are *not* in `auth_events`. Comparing with (2.) above, this means
# exclusively the set of `event.auth_events` which we already had in our
# database before any call to /event_auth.
#
# I'm reasonably sure that the fact that events returned by /event_auth are
# blindly added to auth_events (and hence excluded from different_auth) is a bug
# - though it's a very long-standing one (see
# https://github.com/matrix-org/synapse/commit/78015948a7febb18e000651f72f8f58830a55b93#diff-0bc92da3d703202f5b9be2d3f845e375f5b1a6bc6ba61705a8af9be1121f5e42R786
# from Jan 2015 which seems to add it, though it actually just moves it from
# elsewhere (before that, it gets lost in a mess of huge "various bug fixes"
# PRs).

if not different_auth:
return context, auth_events
return None

logger.info(
"auth_events refers to events which are not in our calculated auth "
Expand All @@ -1680,51 +1612,43 @@ async def _update_auth_events_and_context_for_auth(
# necessary?
different_events = await self._store.get_events_as_list(different_auth)

# double-check they're all in the same room - we should already have checked
# this but it doesn't hurt to check again.
for d in different_events:
if d.room_id != event.room_id:
logger.warning(
"Event %s refers to auth_event %s which is in a different room",
event.event_id,
d.event_id,
)

# don't attempt to resolve the claimed auth events against our own
# in this case: just use our own auth events.
#
# XXX: should we reject the event in this case? It feels like we should,
# but then shouldn't we also do so if we've failed to fetch any of the
# auth events?
return context, auth_events
assert (
d.room_id == event.room_id
), f"Event {event.event_id} refers to auth_event {d.event_id} which is in a different room"

# now we state-resolve between our own idea of the auth events, and the remote's
# idea of them.

local_state = auth_events.values()
remote_auth_events = dict(auth_events)
local_state = calculated_auth_event_map.values()
remote_auth_events = dict(calculated_auth_event_map)
remote_auth_events.update({(d.type, d.state_key): d for d in different_events})
remote_state = remote_auth_events.values()

room_version = await self._store.get_room_version_id(event.room_id)
new_state = await self._state_handler.resolve_events(
room_version, (local_state, remote_state), event
)
different_state = {
(d.type, d.state_key): d
for d in new_state.values()
if calculated_auth_event_map.get((d.type, d.state_key)) != d
}
if not different_state:
logger.info("State res returned no new state")
return None

logger.info(
"After state res: updating auth_events with new state %s",
{
d
for d in new_state.values()
if auth_events.get((d.type, d.state_key)) != d
},
different_state.values(),
)

auth_events.update(new_state)

context = await self._update_context_for_auth_events(
event, context, auth_events
)

return context, auth_events
# take a copy of calculated_auth_event_map before we modify it.
auth_events = dict(calculated_auth_event_map)
auth_events.update(different_state)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent since different_state is new_state - calculated_auth_event_map, so including all of new_state is guaranteed to do some unnecessary updates?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, exactly so. sorry, should have made that clear.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense as a small optimization, just wasn't sure I was missing something!

return auth_events

async def _load_or_fetch_auth_events_for_event(
self, destination: str, event: EventBase
Expand Down