matrix-org · richvdh · Oct 20, 2021 · Oct 19, 2021 · Oct 4, 2021 · Oct 4, 2021
@@ -0,0 +1 @@
+Clean up some of the federation event authentication code for clarity.
@@ -65,7 +65,6 @@
 from synapse.state import StateResolutionStore
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.types import (
-    MutableStateMap,
     PersistedEventPosition,
     RoomStreamToken,
     StateMap,
@@ -1417,13 +1416,8 @@ async def _check_event_auth(
         }
 
         try:
-            (
-                context,
-                auth_events_for_auth,
-            ) = await self._update_auth_events_and_context_for_auth(
-                origin,
+            updated_auth_events = await self._update_auth_events_for_auth(
                 event,
-                context,
                 calculated_auth_event_map=calculated_auth_event_map,
             )
         except Exception:
@@ -1436,6 +1430,14 @@ async def _check_event_auth(
                 "Ignoring failure and continuing processing of event.",
                 event.event_id,
             )
+            updated_auth_events = None
+
+        if updated_auth_events:
+            context = await self._update_context_for_auth_events(
+                event, context, updated_auth_events
+            )
+            auth_events_for_auth = updated_auth_events
+        else:
             auth_events_for_auth = calculated_auth_event_map
 
         try:
@@ -1560,13 +1562,11 @@ async def _check_for_soft_fail(
             soft_failed_event_counter.inc()
             event.internal_metadata.soft_failed = True
 
-    async def _update_auth_events_and_context_for_auth(
+    async def _update_auth_events_for_auth(
         self,
-        origin: str,
         event: EventBase,
-        context: EventContext,
         calculated_auth_event_map: StateMap[EventBase],
-    ) -> Tuple[EventContext, StateMap[EventBase]]:
+    ) -> Optional[StateMap[EventBase]]:
         """Helper for _check_event_auth. See there for docs.
 
         Checks whether a given event has the expected auth events. If it
@@ -1579,96 +1579,27 @@ async def _update_auth_events_and_context_for_auth(
         processing of the event.
 
         Args:
-            origin:
             event:
-            context:
 
             calculated_auth_event_map:
                 Our calculated auth_events based on the state of the room
                 at the event's position in the DAG.
 
         Returns:
-            updated context, updated auth event map
+            updated auth event map, or None if no changes are needed.
+
         """
         assert not event.internal_metadata.outlier
 
-        # take a copy of calculated_auth_event_map before we modify it.
-        auth_events: MutableStateMap[EventBase] = dict(calculated_auth_event_map)
-
+        # check for events which are in the event's claimed auth_events, but not
+        # in our calculated event map.
         event_auth_events = set(event.auth_event_ids())
-
-        # missing_auth is the set of the event's auth_events which we don't yet have
-        # in auth_events.
-        missing_auth = event_auth_events.difference(
-            e.event_id for e in auth_events.values()
-        )
-
-        # if we have missing events, we need to fetch those events from somewhere.
-        #
-        # we start by checking if they are in the store, and then try calling /event_auth/.
-        #
-        # TODO: this code is now redundant, since it should be impossible for us to
-        #   get here without already having the auth events.
-        if missing_auth:
-            have_events = await self._store.have_seen_events(
-                event.room_id, missing_auth
-            )
-            logger.debug("Events %s are in the store", have_events)
-            missing_auth.difference_update(have_events)
-
-        # missing_auth is now the set of event_ids which:
-        #  a. are listed in event.auth_events, *and*
-        #  b. are *not* part of our calculated auth events based on room state, *and*
-        #  c. are *not* yet in our database.
-
-        if missing_auth:
-            # If we don't have all the auth events, we need to get them.
-            logger.info("auth_events contains unknown events: %s", missing_auth)
-            try:
-                await self._get_remote_auth_chain_for_event(
-                    origin, event.room_id, event.event_id
-                )
-            except Exception:
-                logger.exception("Failed to get auth chain")
-            else:
-                # load any auth events we might have persisted from the database. This
-                # has the side-effect of correctly setting the rejected_reason on them.
-                auth_events.update(
-                    {
-                        (ae.type, ae.state_key): ae
-                        for ae in await self._store.get_events_as_list(
-                            missing_auth, allow_rejected=True
-                        )
-                    }
-                )
-
-        # auth_events now contains
-        #   1. our *calculated* auth events based on the room state, plus:
-        #   2. any events which:
-        #       a. are listed in `event.auth_events`, *and*
-        #       b. are not part of our calculated auth events, *and*
-        #       c. were not in our database before the call to /event_auth
-        #       d. have since been added to our database (most likely by /event_auth).
-
         different_auth = event_auth_events.difference(
-            e.event_id for e in auth_events.values()
+            e.event_id for e in calculated_auth_event_map.values()
         )
 
-        # different_auth is the set of events which *are* in `event.auth_events`, but
-        # which are *not* in `auth_events`. Comparing with (2.) above, this means
-        # exclusively the set of `event.auth_events` which we already had in our
-        # database before any call to /event_auth.
-        #
-        # I'm reasonably sure that the fact that events returned by /event_auth are
-        # blindly added to auth_events (and hence excluded from different_auth) is a bug
-        # - though it's a very long-standing one (see
-        # https://github.com/matrix-org/synapse/commit/78015948a7febb18e000651f72f8f58830a55b93#diff-0bc92da3d703202f5b9be2d3f845e375f5b1a6bc6ba61705a8af9be1121f5e42R786
-        # from Jan 2015 which seems to add it, though it actually just moves it from
-        # elsewhere (before that, it gets lost in a mess of huge "various bug fixes"
-        # PRs).
-
         if not different_auth:
-            return context, auth_events
+            return None
 
         logger.info(
             "auth_events refers to events which are not in our calculated auth "
@@ -1680,51 +1611,43 @@ async def _update_auth_events_and_context_for_auth(
         # necessary?
         different_events = await self._store.get_events_as_list(different_auth)
 
+        # double-check they're all in the same room - we should already have checked
+        # this but it doesn't hurt to check again.
         for d in different_events:
-            if d.room_id != event.room_id:
-                logger.warning(
-                    "Event %s refers to auth_event %s which is in a different room",
-                    event.event_id,
-                    d.event_id,
-                )
-
-                # don't attempt to resolve the claimed auth events against our own
-                # in this case: just use our own auth events.
-                #
-                # XXX: should we reject the event in this case? It feels like we should,
-                # but then shouldn't we also do so if we've failed to fetch any of the
-                # auth events?
-                return context, auth_events
+            assert (
+                d.room_id == event.room_id
+            ), f"Event {event.event_id} refers to auth_event {d.event_id} which is in a different room"
 
         # now we state-resolve between our own idea of the auth events, and the remote's
         # idea of them.
 
-        local_state = auth_events.values()
-        remote_auth_events = dict(auth_events)
+        local_state = calculated_auth_event_map.values()
+        remote_auth_events = dict(calculated_auth_event_map)
         remote_auth_events.update({(d.type, d.state_key): d for d in different_events})
         remote_state = remote_auth_events.values()
 
         room_version = await self._store.get_room_version_id(event.room_id)
         new_state = await self._state_handler.resolve_events(
             room_version, (local_state, remote_state), event
         )
+        different_state = {
+            (d.type, d.state_key): d
+            for d in new_state.values()
+            if calculated_auth_event_map.get((d.type, d.state_key)) != d
+        }
+        if not different_state:
+            logger.info("State res returned no new state")
+            return None
 
         logger.info(
             "After state res: updating auth_events with new state %s",
-            {
-                d
-                for d in new_state.values()
-                if auth_events.get((d.type, d.state_key)) != d
-            },
+            different_state.values(),
         )
 
-        auth_events.update(new_state)
-
-        context = await self._update_context_for_auth_events(
-            event, context, auth_events
-        )
-
-        return context, auth_events
+        # take a copy of calculated_auth_event_map before we modify it.
+        auth_events = dict(calculated_auth_event_map)
+        auth_events.update(different_state)
+        return auth_events
 
     async def _load_or_fetch_auth_events_for_event(
         self, destination: str, event: EventBase