@@ -119,6 +119,7 @@ COMMIT;
119119 // standard test suite.
120120
121121 test ('sync_local new query' , () {
122+ // This is the query we're using now.
122123 // This query only uses a single TEMP B-TREE for the GROUP BY operation,
123124 // leading to fairly efficient execution.
124125
@@ -134,27 +135,40 @@ COMMIT;
134135 // |--USE TEMP B-TREE FOR GROUP BY
135136 // `--CORRELATED SCALAR SUBQUERY 3
136137 // `--SEARCH r USING INDEX ps_oplog_row (row_type=? AND row_id=?)
138+ //
139+ // For details on the max(r.op_id) clause, see:
140+ // https://sqlite.org/lang_select.html#bare_columns_in_an_aggregate_query
141+ // > If there is exactly one min() or max() aggregate in the query, then all bare columns in the result
142+ // > set take values from an input row which also contains the minimum or maximum.
137143
138144 var timer = Stopwatch ()..start ();
139145 final q = '''
146+ -- 1. Filter oplog by the ops added but not applied yet (oplog b).
147+ -- We do not do any DISTINCT operation here, since that introduces a temp b-tree.
148+ -- We filter out duplicates using the GROUP BY below.
140149WITH updated_rows AS (
141150 SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
142151 CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
143152 AND (b.op_id > buckets.last_applied_op)
144153 UNION ALL SELECT row_type, row_id FROM ps_updated_rows
145154)
146155
156+ -- 2. Find *all* current ops over different buckets for those objects (oplog r).
147157SELECT
148158 b.row_type,
149159 b.row_id,
150160 (
161+ -- 3. For each unique row, select the data from the latest oplog entry.
162+ -- The max(r.op_id) clause is used to select the latest oplog entry.
163+ -- The iif is to avoid the max(r.op_id) column ending up in the results.
151164 SELECT iif(max(r.op_id), r.data, null)
152165 FROM ps_oplog r
153166 WHERE r.row_type = b.row_type
154167 AND r.row_id = b.row_id
155168
156169 ) as data
157170 FROM updated_rows b
171+ -- Group for (2)
158172 GROUP BY b.row_type, b.row_id;
159173''' ;
160174 db.select (q);
0 commit comments