Skip to content

Commit b732d13

Browse files
committed
Sliding sync: various fixups to the background update (#17652)
1 parent 5562a89 commit b732d13

File tree

4 files changed

+186
-180
lines changed

4 files changed

+186
-180
lines changed

changelog.d/17652.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Pre-populate room data used in experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint for quick filtering/sorting.

synapse/storage/databases/main/events.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,7 +1980,12 @@ def _get_sliding_sync_insert_values_from_state_map(
19801980
if state_key == (EventTypes.Create, ""):
19811981
room_type = event.content.get(EventContentFields.ROOM_TYPE)
19821982
# Scrutinize JSON values
1983-
if room_type is None or isinstance(room_type, str):
1983+
if room_type is None or (
1984+
isinstance(room_type, str)
1985+
# We ignore values with null bytes as Postgres doesn't allow them in
1986+
# text columns.
1987+
and "\0" not in room_type
1988+
):
19841989
sliding_sync_insert_map["room_type"] = room_type
19851990
elif state_key == (EventTypes.RoomEncryption, ""):
19861991
encryption_algorithm = event.content.get(
@@ -1990,15 +1995,26 @@ def _get_sliding_sync_insert_values_from_state_map(
19901995
sliding_sync_insert_map["is_encrypted"] = is_encrypted
19911996
elif state_key == (EventTypes.Name, ""):
19921997
room_name = event.content.get(EventContentFields.ROOM_NAME)
1993-
# Scrutinize JSON values
1994-
if room_name is None or isinstance(room_name, str):
1998+
# Scrutinize JSON values. We ignore values with nulls as
1999+
# postgres doesn't allow null bytes in text columns.
2000+
if room_name is None or (
2001+
isinstance(room_name, str)
2002+
# We ignore values with null bytes as Postgres doesn't allow them in
2003+
# text columns.
2004+
and "\0" not in room_name
2005+
):
19952006
sliding_sync_insert_map["room_name"] = room_name
19962007
elif state_key == (EventTypes.Tombstone, ""):
19972008
successor_room_id = event.content.get(
19982009
EventContentFields.TOMBSTONE_SUCCESSOR_ROOM
19992010
)
20002011
# Scrutinize JSON values
2001-
if successor_room_id is None or isinstance(successor_room_id, str):
2012+
if successor_room_id is None or (
2013+
isinstance(successor_room_id, str)
2014+
# We ignore values with null bytes as Postgres doesn't allow them in
2015+
# text columns.
2016+
and "\0" not in successor_room_id
2017+
):
20022018
sliding_sync_insert_map["tombstone_successor_room_id"] = (
20032019
successor_room_id
20042020
)
@@ -2081,6 +2097,21 @@ def _get_sliding_sync_insert_values_from_stripped_state(
20812097
else None
20822098
)
20832099

2100+
# Check for null bytes in the room name and type. We have to
2101+
# ignore values with null bytes as Postgres doesn't allow them
2102+
# in text columns.
2103+
if (
2104+
sliding_sync_insert_map["room_name"] is not None
2105+
and "\0" in sliding_sync_insert_map["room_name"]
2106+
):
2107+
sliding_sync_insert_map.pop("room_name")
2108+
2109+
if (
2110+
sliding_sync_insert_map["room_type"] is not None
2111+
and "\0" in sliding_sync_insert_map["room_type"]
2112+
):
2113+
sliding_sync_insert_map.pop("room_type")
2114+
20842115
# Find the tombstone_successor_room_id
20852116
# Note: This isn't one of the stripped state events according to the spec
20862117
# but seems like there is no reason not to support this kind of thing.
@@ -2095,6 +2126,12 @@ def _get_sliding_sync_insert_values_from_stripped_state(
20952126
else None
20962127
)
20972128

2129+
if (
2130+
sliding_sync_insert_map["tombstone_successor_room_id"] is not None
2131+
and "\0" in sliding_sync_insert_map["tombstone_successor_room_id"]
2132+
):
2133+
sliding_sync_insert_map.pop("tombstone_successor_room_id")
2134+
20982135
else:
20992136
# No stripped state provided
21002137
sliding_sync_insert_map["has_known_state"] = False

synapse/storage/databases/main/events_bg_updates.py

Lines changed: 144 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
)
4848
from synapse.storage.databases.main.state_deltas import StateDeltasStore
4949
from synapse.storage.databases.main.stream import StreamWorkerStore
50+
from synapse.storage.engines import PostgresEngine
5051
from synapse.storage.types import Cursor
5152
from synapse.types import JsonDict, RoomStreamToken, StateMap, StrCollection
5253
from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES
@@ -1877,9 +1878,29 @@ async def _sliding_sync_membership_snapshots_bg_update(
18771878
def _find_memberships_to_update_txn(
18781879
txn: LoggingTransaction,
18791880
) -> List[
1880-
Tuple[str, Optional[str], str, str, str, str, int, Optional[str], bool]
1881+
Tuple[
1882+
str,
1883+
Optional[str],
1884+
Optional[str],
1885+
str,
1886+
str,
1887+
str,
1888+
str,
1889+
int,
1890+
Optional[str],
1891+
bool,
1892+
]
18811893
]:
18821894
# Fetch the set of event IDs that we want to update
1895+
#
1896+
# We skip over rows which we've already handled, i.e. have a
1897+
# matching row in `sliding_sync_membership_snapshots` with the same
1898+
# room, user and event ID.
1899+
#
1900+
# We also ignore rooms that the user has left themselves (i.e. not
1901+
# kicked). This is to avoid having to port lots of old rooms that we
1902+
# will never send down sliding sync (as we exclude such rooms from
1903+
# initial syncs).
18831904

18841905
if initial_phase:
18851906
# There are some old out-of-band memberships (before
@@ -1892,6 +1913,7 @@ def _find_memberships_to_update_txn(
18921913
SELECT
18931914
c.room_id,
18941915
r.room_id,
1916+
r.room_version,
18951917
c.user_id,
18961918
e.sender,
18971919
c.event_id,
@@ -1900,9 +1922,11 @@ def _find_memberships_to_update_txn(
19001922
e.instance_name,
19011923
e.outlier
19021924
FROM local_current_membership AS c
1925+
LEFT JOIN sliding_sync_membership_snapshots AS m USING (room_id, user_id)
19031926
INNER JOIN events AS e USING (event_id)
19041927
LEFT JOIN rooms AS r ON (c.room_id = r.room_id)
19051928
WHERE (c.room_id, c.user_id) > (?, ?)
1929+
AND (m.user_id IS NULL OR c.event_id != m.membership_event_id)
19061930
ORDER BY c.room_id ASC, c.user_id ASC
19071931
LIMIT ?
19081932
""",
@@ -1922,7 +1946,8 @@ def _find_memberships_to_update_txn(
19221946
"""
19231947
SELECT
19241948
c.room_id,
1925-
c.room_id,
1949+
r.room_id,
1950+
r.room_version,
19261951
c.user_id,
19271952
e.sender,
19281953
c.event_id,
@@ -1931,9 +1956,12 @@ def _find_memberships_to_update_txn(
19311956
e.instance_name,
19321957
e.outlier
19331958
FROM local_current_membership AS c
1959+
LEFT JOIN sliding_sync_membership_snapshots AS m USING (room_id, user_id)
19341960
INNER JOIN events AS e USING (event_id)
1935-
WHERE event_stream_ordering > ?
1936-
ORDER BY event_stream_ordering ASC
1961+
LEFT JOIN rooms AS r ON (c.room_id = r.room_id)
1962+
WHERE c.event_stream_ordering > ?
1963+
AND (m.user_id IS NULL OR c.event_id != m.membership_event_id)
1964+
ORDER BY c.event_stream_ordering ASC
19371965
LIMIT ?
19381966
""",
19391967
(last_event_stream_ordering, batch_size),
@@ -1944,7 +1972,16 @@ def _find_memberships_to_update_txn(
19441972
memberships_to_update_rows = cast(
19451973
List[
19461974
Tuple[
1947-
str, Optional[str], str, str, str, str, int, Optional[str], bool
1975+
str,
1976+
Optional[str],
1977+
Optional[str],
1978+
str,
1979+
str,
1980+
str,
1981+
str,
1982+
int,
1983+
Optional[str],
1984+
bool,
19481985
]
19491986
],
19501987
txn.fetchall(),
@@ -1977,7 +2014,7 @@ def _find_memberships_to_update_txn(
19772014

19782015
def _find_previous_invite_or_knock_membership_txn(
19792016
txn: LoggingTransaction, room_id: str, user_id: str, event_id: str
1980-
) -> Tuple[str, str]:
2017+
) -> Optional[Tuple[str, str]]:
19812018
# Find the previous invite/knock event before the leave event
19822019
#
19832020
# Here are some notes on how we landed on this query:
@@ -2027,8 +2064,13 @@ def _find_previous_invite_or_knock_membership_txn(
20272064
)
20282065
row = txn.fetchone()
20292066

2030-
# We should see a corresponding previous invite/knock event
2031-
assert row is not None
2067+
if row is None:
2068+
# Generally we should have an invite or knock event for leaves
2069+
# that are outliers, however this may not always be the case
2070+
# (e.g. a local user got kicked but the kick event got pulled in
2071+
# as an outlier).
2072+
return None
2073+
20322074
event_id, membership = row
20332075

20342076
return event_id, membership
@@ -2043,6 +2085,7 @@ def _find_previous_invite_or_knock_membership_txn(
20432085
for (
20442086
room_id,
20452087
room_id_from_rooms_table,
2088+
room_version_id,
20462089
user_id,
20472090
sender,
20482091
membership_event_id,
@@ -2061,6 +2104,14 @@ def _find_previous_invite_or_knock_membership_txn(
20612104
Membership.BAN,
20622105
)
20632106

2107+
if (
2108+
room_version_id is not None
2109+
and room_version_id not in KNOWN_ROOM_VERSIONS
2110+
):
2111+
# Ignore rooms with unknown room versions (these were
2112+
# experimental rooms, that we no longer support).
2113+
continue
2114+
20642115
# There are some old out-of-band memberships (before
20652116
# https://github.com/matrix-org/synapse/issues/6983) where we don't have the
20662117
# corresponding room stored in the `rooms` table`. We have a `FOREIGN KEY`
@@ -2148,14 +2199,17 @@ def _find_previous_invite_or_knock_membership_txn(
21482199
# in the events table though. We'll just say that we don't
21492200
# know the state for these rooms and continue on with our
21502201
# day.
2151-
sliding_sync_membership_snapshots_insert_map["has_known_state"] = (
2152-
False
2153-
)
2202+
sliding_sync_membership_snapshots_insert_map = {
2203+
"has_known_state": False,
2204+
"room_type": None,
2205+
"room_name": None,
2206+
"is_encrypted": False,
2207+
}
21542208
elif membership in (Membership.INVITE, Membership.KNOCK) or (
21552209
membership in (Membership.LEAVE, Membership.BAN) and is_outlier
21562210
):
2157-
invite_or_knock_event_id = membership_event_id
2158-
invite_or_knock_membership = membership
2211+
invite_or_knock_event_id = None
2212+
invite_or_knock_membership = None
21592213

21602214
# If the event is an `out_of_band_membership` (special case of
21612215
# `outlier`), we never had historical state so we have to pull from
@@ -2164,35 +2218,55 @@ def _find_previous_invite_or_knock_membership_txn(
21642218
# membership (i.e. the room shouldn't disappear if your using the
21652219
# `is_encrypted` filter and you leave).
21662220
if membership in (Membership.LEAVE, Membership.BAN) and is_outlier:
2167-
(
2168-
invite_or_knock_event_id,
2169-
invite_or_knock_membership,
2170-
) = await self.db_pool.runInteraction(
2221+
previous_membership = await self.db_pool.runInteraction(
21712222
"sliding_sync_membership_snapshots_bg_update._find_previous_invite_or_knock_membership_txn",
21722223
_find_previous_invite_or_knock_membership_txn,
21732224
room_id,
21742225
user_id,
21752226
membership_event_id,
21762227
)
2228+
if previous_membership is not None:
2229+
(
2230+
invite_or_knock_event_id,
2231+
invite_or_knock_membership,
2232+
) = previous_membership
2233+
else:
2234+
invite_or_knock_event_id = membership_event_id
2235+
invite_or_knock_membership = membership
21772236

2178-
# Pull from the stripped state on the invite/knock event
2179-
invite_or_knock_event = await self.get_event(invite_or_knock_event_id)
2180-
2181-
raw_stripped_state_events = None
2182-
if invite_or_knock_membership == Membership.INVITE:
2183-
invite_room_state = invite_or_knock_event.unsigned.get(
2184-
"invite_room_state"
2185-
)
2186-
raw_stripped_state_events = invite_room_state
2187-
elif invite_or_knock_membership == Membership.KNOCK:
2188-
knock_room_state = invite_or_knock_event.unsigned.get(
2189-
"knock_room_state"
2237+
if (
2238+
invite_or_knock_event_id is not None
2239+
and invite_or_knock_membership is not None
2240+
):
2241+
# Pull from the stripped state on the invite/knock event
2242+
invite_or_knock_event = await self.get_event(
2243+
invite_or_knock_event_id
21902244
)
2191-
raw_stripped_state_events = knock_room_state
21922245

2193-
sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state(
2194-
raw_stripped_state_events
2195-
)
2246+
raw_stripped_state_events = None
2247+
if invite_or_knock_membership == Membership.INVITE:
2248+
invite_room_state = invite_or_knock_event.unsigned.get(
2249+
"invite_room_state"
2250+
)
2251+
raw_stripped_state_events = invite_room_state
2252+
elif invite_or_knock_membership == Membership.KNOCK:
2253+
knock_room_state = invite_or_knock_event.unsigned.get(
2254+
"knock_room_state"
2255+
)
2256+
raw_stripped_state_events = knock_room_state
2257+
2258+
sliding_sync_membership_snapshots_insert_map = PersistEventsStore._get_sliding_sync_insert_values_from_stripped_state(
2259+
raw_stripped_state_events
2260+
)
2261+
else:
2262+
# We couldn't find any state for the membership, so we just have to
2263+
# leave it as empty.
2264+
sliding_sync_membership_snapshots_insert_map = {
2265+
"has_known_state": False,
2266+
"room_type": None,
2267+
"room_name": None,
2268+
"is_encrypted": False,
2269+
}
21962270

21972271
# We should have some insert values for each room, even if no
21982272
# stripped state is on the event because we still want to record
@@ -2311,19 +2385,42 @@ def _fill_table_txn(txn: LoggingTransaction) -> None:
23112385
)
23122386
# We need to find the `forgotten` value during the transaction because
23132387
# we can't risk inserting stale data.
2314-
txn.execute(
2315-
"""
2316-
UPDATE sliding_sync_membership_snapshots
2317-
SET
2318-
forgotten = (SELECT forgotten FROM room_memberships WHERE event_id = ?)
2319-
WHERE room_id = ? and user_id = ?
2320-
""",
2321-
(
2322-
membership_event_id,
2323-
room_id,
2324-
user_id,
2325-
),
2326-
)
2388+
if isinstance(txn.database_engine, PostgresEngine):
2389+
txn.execute(
2390+
"""
2391+
UPDATE sliding_sync_membership_snapshots
2392+
SET
2393+
forgotten = m.forgotten
2394+
FROM room_memberships AS m
2395+
WHERE sliding_sync_membership_snapshots.room_id = ?
2396+
AND sliding_sync_membership_snapshots.user_id = ?
2397+
AND membership_event_id = ?
2398+
AND membership_event_id = m.event_id
2399+
AND m.event_id IS NOT NULL
2400+
""",
2401+
(
2402+
room_id,
2403+
user_id,
2404+
membership_event_id,
2405+
),
2406+
)
2407+
else:
2408+
# SQLite doesn't support UPDATE FROM before 3.33.0, so we do
2409+
# this via sub-selects.
2410+
txn.execute(
2411+
"""
2412+
UPDATE sliding_sync_membership_snapshots
2413+
SET
2414+
forgotten = (SELECT forgotten FROM room_memberships WHERE event_id = ?)
2415+
WHERE room_id = ? and user_id = ? AND membership_event_id = ?
2416+
""",
2417+
(
2418+
membership_event_id,
2419+
room_id,
2420+
user_id,
2421+
membership_event_id,
2422+
),
2423+
)
23272424

23282425
await self.db_pool.runInteraction(
23292426
"sliding_sync_membership_snapshots_bg_update", _fill_table_txn
@@ -2333,6 +2430,7 @@ def _fill_table_txn(txn: LoggingTransaction) -> None:
23332430
(
23342431
room_id,
23352432
_room_id_from_rooms_table,
2433+
_room_version_id,
23362434
user_id,
23372435
_sender,
23382436
_membership_event_id,

0 commit comments

Comments
 (0)