Skip to content

Commit e1ed959

Browse files
Sliding Sync: Get bump_stamp from new sliding sync tables because it's faster (#17658)
Get `bump_stamp` from [new sliding sync tables](#17512) which should be faster (performance) than flipping through the latest events in the room.
1 parent 5c22941 commit e1ed959

File tree

5 files changed

+333
-56
lines changed

5 files changed

+333
-56
lines changed

changelog.d/17658.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Get `bump_stamp` from [new sliding sync tables](https://github.com/element-hq/synapse/pull/17512) which should be faster.

synapse/handlers/sliding_sync/__init__.py

+56-18
Original file line numberDiff line numberDiff line change
@@ -1040,29 +1040,67 @@ async def get_room_sync_data(
10401040
)
10411041
)
10421042

1043-
# By default, just choose the membership event position
1043+
# Figure out the last bump event in the room
1044+
#
1045+
# By default, just choose the membership event position for any non-join membership
10441046
bump_stamp = room_membership_for_user_at_to_token.event_pos.stream
1045-
1046-
# Figure out the last bump event in the room if we're in the room.
1047+
# If we're joined to the room, we need to find the last bump event before the
1048+
# `to_token`
10471049
if room_membership_for_user_at_to_token.membership == Membership.JOIN:
1048-
last_bump_event_result = (
1049-
await self.store.get_last_event_pos_in_room_before_stream_ordering(
1050-
room_id,
1051-
to_token.room_key,
1052-
event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES,
1053-
)
1050+
# We can quickly query for the latest bump event in the room using the
1051+
# sliding sync tables.
1052+
latest_room_bump_stamp = await self.store.get_latest_bump_stamp_for_room(
1053+
room_id
10541054
)
10551055

1056-
# But if we found a bump event, use that instead
1057-
if last_bump_event_result is not None:
1058-
_, new_bump_event_pos = last_bump_event_result
1056+
min_to_token_position = to_token.room_key.stream
10591057

1060-
# If we've just joined a remote room, then the last bump event may
1061-
# have been backfilled (and so have a negative stream ordering).
1062-
# These negative stream orderings can't sensibly be compared, so
1063-
# instead we use the membership event position.
1064-
if new_bump_event_pos.stream > 0:
1065-
bump_stamp = new_bump_event_pos.stream
1058+
# If we can rely on the new sliding sync tables and the `bump_stamp` is
1059+
# `None`, just fallback to the membership event position. This can happen
1060+
# when we've just joined a remote room and all the events are backfilled.
1061+
if (
1062+
# FIXME: The background job check can be removed once we bump
1063+
# `SCHEMA_COMPAT_VERSION` and run the foreground update for
1064+
# `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots`
1065+
# (tracked by https://github.com/element-hq/synapse/issues/17623)
1066+
await self.store.have_finished_sliding_sync_background_jobs()
1067+
and latest_room_bump_stamp is None
1068+
):
1069+
pass
1070+
1071+
# The `bump_stamp` stored in the database might be ahead of our token. Since
1072+
# `bump_stamp` is only a `stream_ordering` position, we can't be 100% sure
1073+
# that's before the `to_token` in all scenarios. The only scenario we can be
1074+
# sure of is if the `bump_stamp` is totally before the minimum position from
1075+
# the token.
1076+
#
1077+
# We don't need to check if the background update has finished, as if the
1078+
# returned bump stamp is not None then it must be up to date.
1079+
elif (
1080+
latest_room_bump_stamp is not None
1081+
and latest_room_bump_stamp < min_to_token_position
1082+
):
1083+
bump_stamp = latest_room_bump_stamp
1084+
1085+
# Otherwise, if it's within or after the `to_token`, we need to find the
1086+
# last bump event before the `to_token`.
1087+
else:
1088+
last_bump_event_result = (
1089+
await self.store.get_last_event_pos_in_room_before_stream_ordering(
1090+
room_id,
1091+
to_token.room_key,
1092+
event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES,
1093+
)
1094+
)
1095+
if last_bump_event_result is not None:
1096+
_, new_bump_event_pos = last_bump_event_result
1097+
1098+
# If we've just joined a remote room, then the last bump event may
1099+
# have been backfilled (and so have a negative stream ordering).
1100+
# These negative stream orderings can't sensibly be compared, so
1101+
# instead we use the membership event position.
1102+
if new_bump_event_pos.stream > 0:
1103+
bump_stamp = new_bump_event_pos.stream
10661104

10671105
unstable_expanded_timeline = False
10681106
prev_room_sync_config = previous_connection_state.room_configs.get(room_id)

synapse/storage/databases/main/events.py

+32-29
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,13 @@ async def _persist_events_and_state_updates(
327327

328328
async with stream_ordering_manager as stream_orderings:
329329
for (event, _), stream in zip(events_and_contexts, stream_orderings):
330+
# XXX: We can't rely on `stream_ordering`/`instance_name` being correct
331+
# at this point. We could be working with events that were previously
332+
# persisted as an `outlier` with one `stream_ordering` but are now being
333+
# persisted again and de-outliered and are being assigned a different
334+
# `stream_ordering` here that won't end up being used.
335+
# `_update_outliers_txn()` will fix this discrepancy (always use the
336+
# `stream_ordering` from the first time it was persisted).
330337
event.internal_metadata.stream_ordering = stream
331338
event.internal_metadata.instance_name = self._instance_name
332339

@@ -470,11 +477,11 @@ async def _calculate_sliding_sync_table_changes(
470477
membership_infos_to_insert_membership_snapshots.append(
471478
# XXX: We don't use `SlidingSyncMembershipInfoWithEventPos` here
472479
# because we're sourcing the event from `events_and_contexts`, we
473-
# can't rely on `stream_ordering`/`instance_name` being correct. We
474-
# could be working with events that were previously persisted as an
475-
# `outlier` with one `stream_ordering` but are now being persisted
476-
# again and de-outliered and assigned a different `stream_ordering`
477-
# that won't end up being used. Since we call
480+
# can't rely on `stream_ordering`/`instance_name` being correct at
481+
# this point. We could be working with events that were previously
482+
# persisted as an `outlier` with one `stream_ordering` but are now
483+
# being persisted again and de-outliered and assigned a different
484+
# `stream_ordering` that won't end up being used. Since we call
478485
# `_calculate_sliding_sync_table_changes()` before
479486
# `_update_outliers_txn()` which fixes this discrepancy (always use
480487
# the `stream_ordering` from the first time it was persisted), we're
@@ -591,11 +598,17 @@ async def _calculate_sliding_sync_table_changes(
591598
event_types=SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES,
592599
)
593600
)
594-
bump_stamp_to_fully_insert = (
595-
most_recent_bump_event_pos_results[1].stream
596-
if most_recent_bump_event_pos_results is not None
597-
else None
598-
)
601+
if most_recent_bump_event_pos_results is not None:
602+
_, new_bump_event_pos = most_recent_bump_event_pos_results
603+
604+
# If we've just joined a remote room, then the last bump event may
605+
# have been backfilled (and so have a negative stream ordering).
606+
# These negative stream orderings can't sensibly be compared, so
607+
# instead just leave it as `None` in the table and we will use their
608+
# membership event position as the bump event position in the
609+
# Sliding Sync API.
610+
if new_bump_event_pos.stream > 0:
611+
bump_stamp_to_fully_insert = new_bump_event_pos.stream
599612

600613
current_state_ids_map = dict(
601614
await self.store.get_partial_filtered_current_state_ids(
@@ -2123,31 +2136,26 @@ def _update_sliding_sync_tables_with_new_persisted_events_txn(
21232136
if len(events_and_contexts) == 0:
21242137
return
21252138

2126-
# We only update the sliding sync tables for non-backfilled events.
2127-
#
2128-
# Check if the first event is a backfilled event (with a negative
2129-
# `stream_ordering`). If one event is backfilled, we assume this whole batch was
2130-
# backfilled.
2131-
first_event_stream_ordering = events_and_contexts[0][
2132-
0
2133-
].internal_metadata.stream_ordering
2134-
# This should exist for persisted events
2135-
assert first_event_stream_ordering is not None
2136-
if first_event_stream_ordering < 0:
2137-
return
2138-
21392139
# Since the list is sorted ascending by `stream_ordering`, the last event should
21402140
# have the highest `stream_ordering`.
21412141
max_stream_ordering = events_and_contexts[-1][
21422142
0
21432143
].internal_metadata.stream_ordering
2144+
# `stream_ordering` should be assigned for persisted events
2145+
assert max_stream_ordering is not None
2146+
# Check if the event is a backfilled event (with a negative `stream_ordering`).
2147+
# If one event is backfilled, we assume this whole batch was backfilled.
2148+
if max_stream_ordering < 0:
2149+
# We only update the sliding sync tables for non-backfilled events.
2150+
return
2151+
21442152
max_bump_stamp = None
21452153
for event, _ in reversed(events_and_contexts):
21462154
# Sanity check that all events belong to the same room
21472155
assert event.room_id == room_id
21482156

21492157
if event.type in SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES:
2150-
# This should exist for persisted events
2158+
# `stream_ordering` should be assigned for persisted events
21512159
assert event.internal_metadata.stream_ordering is not None
21522160

21532161
max_bump_stamp = event.internal_metadata.stream_ordering
@@ -2156,11 +2164,6 @@ def _update_sliding_sync_tables_with_new_persisted_events_txn(
21562164
# matching bump event which should have the highest `stream_ordering`.
21572165
break
21582166

2159-
# We should have exited earlier if there were no events
2160-
assert (
2161-
max_stream_ordering is not None
2162-
), "Expected to have a stream_ordering if we have events"
2163-
21642167
# Handle updating the `sliding_sync_joined_rooms` table.
21652168
#
21662169
txn.execute(

synapse/storage/databases/main/sliding_sync.py

+40
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,46 @@
4141

4242

4343
class SlidingSyncStore(SQLBaseStore):
44+
async def get_latest_bump_stamp_for_room(
45+
self,
46+
room_id: str,
47+
) -> Optional[int]:
48+
"""
49+
Get the `bump_stamp` for the room.
50+
51+
The `bump_stamp` is the `stream_ordering` of the last event according to the
52+
`bump_event_types`. This helps clients sort more readily without them needing to
53+
pull in a bunch of the timeline to determine the last activity.
54+
`bump_event_types` is a thing because for example, we don't want display name
55+
changes to mark the room as unread and bump it to the top. For encrypted rooms,
56+
we just have to consider any activity as a bump because we can't see the content
57+
and the client has to figure it out for themselves.
58+
59+
This should only be called where the server is participating
60+
in the room (someone local is joined).
61+
62+
Returns:
63+
The `bump_stamp` for the room (which can be `None`).
64+
"""
65+
66+
return cast(
67+
Optional[int],
68+
await self.db_pool.simple_select_one_onecol(
69+
table="sliding_sync_joined_rooms",
70+
keyvalues={"room_id": room_id},
71+
retcol="bump_stamp",
72+
# FIXME: This should be `False` once we bump `SCHEMA_COMPAT_VERSION` and run the
73+
# foreground update for
74+
# `sliding_sync_joined_rooms`/`sliding_sync_membership_snapshots` (tracked
75+
# by https://github.com/element-hq/synapse/issues/17623)
76+
#
77+
# The should be `allow_none=False` in the future because event though
78+
# `bump_stamp` itself can be `None`, we should have a row in the
79+
# `sliding_sync_joined_rooms` table for any joined room.
80+
allow_none=True,
81+
),
82+
)
83+
4484
async def persist_per_connection_state(
4585
self,
4686
user_id: str,

0 commit comments

Comments
 (0)