feat(audit): replace hour-trunc dedupe with LAG() session rollup (120-min gap)

Rewrites the activity feed query to group consecutive events on the same
(actor, document, kind) into sessions separated by >120 min gaps. A session
becomes one row with count = events-in-session and happenedAtUntil = last
event timestamp. Singletons keep count=1 / happenedAtUntil=null.

Algorithm: LAG() to get the previous event's timestamp in the same partition,
mark a new session when gap > 7200s, then SUM() over an unbounded preceding
window yields a running session_id. Aggregation groups by session_id.

COMMENT_ADDED and MENTION_CREATED always start a new session — these kinds
never roll up so each event stays its own row.

Also adds BLOCK_REVIEWED to the eligible-kinds WHERE clause (Chronik spec §02)
so reviewed blocks appear in the activity feed.

Five new integration tests cover combine-within-2h, split-at-boundary,
no-hard-cap-on-long-session, never-rolls-up-comments/mentions, and the
count/happenedAtUntil contract on both singletons and rollups.

Part of #285.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marcel
2026-04-20 16:02:16 +02:00
committed by marcel
parent 22ddf8c12a
commit feefa682b3
2 changed files with 231 additions and 25 deletions

View File

@@ -23,33 +23,77 @@ public interface AuditLogQueryRepository extends JpaRepository<AuditLog, UUID> {
Optional<UUID> findMostRecentDocumentIdByActor(@Param("userId") UUID userId);
@Query(value = """
SELECT * FROM (
SELECT DISTINCT ON (a.actor_id, a.document_id, a.kind, date_trunc('hour', a.happened_at))
a.kind AS kind,
a.actor_id AS actorId,
CASE
WHEN u.first_name IS NOT NULL AND u.last_name IS NOT NULL
THEN UPPER(LEFT(u.first_name, 1)) || UPPER(LEFT(u.last_name, 1))
WHEN u.first_name IS NOT NULL THEN UPPER(LEFT(u.first_name, 1))
WHEN u.last_name IS NOT NULL THEN UPPER(LEFT(u.last_name, 1))
ELSE '?'
END AS actorInitials,
COALESCE(u.color, '') AS actorColor,
CONCAT_WS(' ', u.first_name, u.last_name) AS actorName,
a.document_id AS documentId,
a.happened_at AS happened_at,
(a.kind = 'MENTION_CREATED'
AND a.payload->>'mentionedUserId' = :currentUserId) AS youMentioned,
1 AS count,
CAST(NULL AS TIMESTAMPTZ) AS happenedAtUntil
WITH events AS (
SELECT
a.kind,
a.actor_id,
a.document_id,
a.happened_at,
a.payload,
LAG(a.happened_at) OVER (
PARTITION BY a.actor_id, a.document_id, a.kind
ORDER BY a.happened_at
) AS prev_happened_at
FROM audit_log a
LEFT JOIN users u ON u.id = a.actor_id
WHERE a.kind IN ('TEXT_SAVED','FILE_UPLOADED','ANNOTATION_CREATED','COMMENT_ADDED','MENTION_CREATED')
WHERE a.kind IN ('TEXT_SAVED','FILE_UPLOADED','ANNOTATION_CREATED',
'BLOCK_REVIEWED','COMMENT_ADDED','MENTION_CREATED')
AND a.document_id IS NOT NULL
ORDER BY a.actor_id, a.document_id, a.kind,
date_trunc('hour', a.happened_at), a.happened_at DESC
) deduped
ORDER BY happened_at DESC
),
sessions_marked AS (
SELECT
kind, actor_id, document_id, happened_at, payload,
CASE
WHEN kind IN ('COMMENT_ADDED','MENTION_CREATED') THEN 1
WHEN prev_happened_at IS NULL THEN 1
WHEN EXTRACT(EPOCH FROM (happened_at - prev_happened_at)) > 7200 THEN 1
ELSE 0
END AS is_new_session
FROM events
),
sessions AS (
SELECT
kind, actor_id, document_id, happened_at, payload,
SUM(is_new_session) OVER (
PARTITION BY actor_id, document_id, kind
ORDER BY happened_at
ROWS UNBOUNDED PRECEDING
) AS session_id
FROM sessions_marked
),
aggregated AS (
SELECT
s.kind,
s.actor_id,
s.document_id,
s.session_id,
MIN(s.happened_at) AS happened_at,
CASE WHEN COUNT(*) > 1 THEN MAX(s.happened_at) ELSE NULL END AS happened_at_until,
COUNT(*)::int AS count,
BOOL_OR(s.kind = 'MENTION_CREATED'
AND s.payload->>'mentionedUserId' = :currentUserId) AS you_mentioned
FROM sessions s
GROUP BY s.kind, s.actor_id, s.document_id, s.session_id
)
SELECT
ag.kind AS kind,
ag.actor_id AS actorId,
CASE
WHEN u.first_name IS NOT NULL AND u.last_name IS NOT NULL
THEN UPPER(LEFT(u.first_name, 1)) || UPPER(LEFT(u.last_name, 1))
WHEN u.first_name IS NOT NULL THEN UPPER(LEFT(u.first_name, 1))
WHEN u.last_name IS NOT NULL THEN UPPER(LEFT(u.last_name, 1))
ELSE '?'
END AS actorInitials,
COALESCE(u.color, '') AS actorColor,
CONCAT_WS(' ', u.first_name, u.last_name) AS actorName,
ag.document_id AS documentId,
ag.happened_at AS happened_at,
ag.you_mentioned AS youMentioned,
ag.count AS count,
ag.happened_at_until AS happenedAtUntil
FROM aggregated ag
LEFT JOIN users u ON u.id = ag.actor_id
ORDER BY ag.happened_at DESC
LIMIT :limit
""", nativeQuery = true)
List<ActivityFeedRow> findDedupedActivityFeed(