Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * spgvacuum.c
4 : : * vacuum for SP-GiST
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/spgist/spgvacuum.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : :
16 : : #include "postgres.h"
17 : :
18 : : #include "access/genam.h"
19 : : #include "access/spgist_private.h"
20 : : #include "access/spgxlog.h"
21 : : #include "access/transam.h"
22 : : #include "access/xloginsert.h"
23 : : #include "commands/vacuum.h"
24 : : #include "miscadmin.h"
25 : : #include "storage/bufmgr.h"
26 : : #include "storage/indexfsm.h"
27 : : #include "storage/lmgr.h"
28 : : #include "storage/read_stream.h"
29 : : #include "utils/snapmgr.h"
30 : :
31 : :
32 : : /* Entry in pending-list of TIDs we need to revisit */
33 : : typedef struct spgVacPendingItem
34 : : {
35 : : ItemPointerData tid; /* redirection target to visit */
36 : : bool done; /* have we dealt with this? */
37 : : struct spgVacPendingItem *next; /* list link */
38 : : } spgVacPendingItem;
39 : :
40 : : /* Local state for vacuum operations */
41 : : typedef struct spgBulkDeleteState
42 : : {
43 : : /* Parameters passed in to spgvacuumscan */
44 : : IndexVacuumInfo *info;
45 : : IndexBulkDeleteResult *stats;
46 : : IndexBulkDeleteCallback callback;
47 : : void *callback_state;
48 : :
49 : : /* Additional working state */
50 : : SpGistState spgstate; /* for SPGiST operations that need one */
51 : : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 : : TransactionId myXmin; /* for detecting newly-added redirects */
53 : : BlockNumber lastFilledBlock; /* last non-deletable block */
54 : : } spgBulkDeleteState;
55 : :
56 : :
57 : : /*
58 : : * Add TID to pendingList, but only if not already present.
59 : : *
60 : : * Note that new items are always appended at the end of the list; this
61 : : * ensures that scans of the list don't miss items added during the scan.
62 : : */
63 : : static void
64 : 25938 : spgAddPendingTID(spgBulkDeleteState *bds, const ItemPointerData *tid)
65 : : {
66 : 25938 : spgVacPendingItem *pitem;
67 : 25938 : spgVacPendingItem **listLink;
68 : :
69 : : /* search the list for pre-existing entry */
70 : 25938 : listLink = &bds->pendingList;
71 [ + + ]: 2291451 : while (*listLink != NULL)
72 : : {
73 : 2274028 : pitem = *listLink;
74 [ + + ]: 2274028 : if (ItemPointerEquals(tid, &pitem->tid))
75 : 8515 : return; /* already in list, do nothing */
76 : 2265513 : listLink = &pitem->next;
77 : : }
78 : : /* not there, so append new entry */
79 : 17423 : pitem = palloc_object(spgVacPendingItem);
80 : 17423 : pitem->tid = *tid;
81 : 17423 : pitem->done = false;
82 : 17423 : pitem->next = NULL;
83 : 17423 : *listLink = pitem;
84 [ - + ]: 25938 : }
85 : :
86 : : /*
87 : : * Clear pendingList
88 : : */
89 : : static void
90 : 131 : spgClearPendingList(spgBulkDeleteState *bds)
91 : : {
92 : 131 : spgVacPendingItem *pitem;
93 : 131 : spgVacPendingItem *nitem;
94 : :
95 [ + + ]: 17554 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96 : : {
97 : 17423 : nitem = pitem->next;
98 : : /* All items in list should have been dealt with */
99 [ + - ]: 17423 : Assert(pitem->done);
100 : 17423 : pfree(pitem);
101 : 17423 : }
102 : 131 : bds->pendingList = NULL;
103 : 131 : }
104 : :
105 : : /*
106 : : * Vacuum a regular (non-root) leaf page
107 : : *
108 : : * We must delete tuples that are targeted for deletion by the VACUUM,
109 : : * but not move any tuples that are referenced by outside links; we assume
110 : : * those are the ones that are heads of chains.
111 : : *
112 : : * If we find a REDIRECT that was made by a concurrently-running transaction,
113 : : * we must add its target TID to pendingList. (We don't try to visit the
114 : : * target immediately, first because we don't want VACUUM locking more than
115 : : * one buffer at a time, and second because the duplicate-filtering logic
116 : : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 : : * loop in the face of continuous concurrent insertions.)
118 : : *
119 : : * If forPending is true, we are examining the page as a consequence of
120 : : * chasing a redirect link, not as part of the normal sequential scan.
121 : : * We still vacuum the page normally, but we don't increment the stats
122 : : * about live tuples; else we'd double-count those tuples, since the page
123 : : * has been or will be visited in the sequential scan as well.
124 : : */
125 : : static void
126 : 9266 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 : : bool forPending)
128 : : {
129 : 9266 : Page page = BufferGetPage(buffer);
130 : 9266 : spgxlogVacuumLeaf xlrec;
131 : 9266 : OffsetNumber toDead[MaxIndexTuplesPerPage];
132 : 9266 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 : 9266 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 : 9266 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 : 9266 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 : 9266 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 : 9266 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 : 9266 : bool deletable[MaxIndexTuplesPerPage + 1];
139 : 9266 : int nDeletable;
140 : 9266 : OffsetNumber i,
141 : 9266 : max = PageGetMaxOffsetNumber(page);
142 : :
143 : 9266 : memset(predecessor, 0, sizeof(predecessor));
144 : 9266 : memset(deletable, 0, sizeof(deletable));
145 : 9266 : nDeletable = 0;
146 : :
147 : : /* Scan page, identify tuples to delete, accumulate stats */
148 [ + + ]: 1440268 : for (i = FirstOffsetNumber; i <= max; i++)
149 : : {
150 : 1431002 : SpGistLeafTuple lt;
151 : :
152 : 2862004 : lt = (SpGistLeafTuple) PageGetItem(page,
153 : 1431002 : PageGetItemId(page, i));
154 [ + + ]: 1431002 : if (lt->tupstate == SPGIST_LIVE)
155 : : {
156 [ + - ]: 1391797 : Assert(ItemPointerIsValid(<->heapPtr));
157 : :
158 [ + + ]: 1391797 : if (bds->callback(<->heapPtr, bds->callback_state))
159 : : {
160 : 999 : bds->stats->tuples_removed += 1;
161 : 999 : deletable[i] = true;
162 : 999 : nDeletable++;
163 : 999 : }
164 : : else
165 : : {
166 [ + + ]: 1390798 : if (!forPending)
167 : 80396 : bds->stats->num_index_tuples += 1;
168 : : }
169 : :
170 : : /* Form predecessor map, too */
171 [ + + ]: 1391797 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
172 : : {
173 : : /* paranoia about corrupted chain links */
174 [ + - ]: 1381162 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 : 1381162 : SGLT_GET_NEXTOFFSET(lt) > max ||
176 : 1381162 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
177 [ # # # # ]: 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 : : BufferGetBlockNumber(buffer),
179 : : RelationGetRelationName(index));
180 : 1381162 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
181 : 1381162 : }
182 : 1391797 : }
183 [ + + ]: 39205 : else if (lt->tupstate == SPGIST_REDIRECT)
184 : : {
185 : 8649 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
186 : :
187 [ + - ]: 8649 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
188 [ + - ]: 8649 : Assert(ItemPointerIsValid(&dt->pointer));
189 : :
190 : : /*
191 : : * Add target TID to pending list if the redirection could have
192 : : * happened since VACUUM started. (If xid is invalid, assume it
193 : : * must have happened before VACUUM started, since REINDEX
194 : : * CONCURRENTLY locks out VACUUM.)
195 : : *
196 : : * Note: we could make a tighter test by seeing if the xid is
197 : : * "running" according to the active snapshot; but snapmgr.c
198 : : * doesn't currently export a suitable API, and it's not entirely
199 : : * clear that a tighter test is worth the cycles anyway.
200 : : */
201 [ + + ]: 8649 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
202 : 8646 : spgAddPendingTID(bds, &dt->pointer);
203 : 8649 : }
204 : : else
205 : : {
206 [ + - ]: 30556 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
207 : : }
208 : 1431002 : }
209 : :
210 [ + + ]: 9266 : if (nDeletable == 0)
211 : 9253 : return; /* nothing more to do */
212 : :
213 : : /*----------
214 : : * Figure out exactly what we have to do. We do this separately from
215 : : * actually modifying the page, mainly so that we have a representation
216 : : * that can be dumped into WAL and then the replay code can do exactly
217 : : * the same thing. The output of this step consists of six arrays
218 : : * describing four kinds of operations, to be performed in this order:
219 : : *
220 : : * toDead[]: tuple numbers to be replaced with DEAD tuples
221 : : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
222 : : * moveSrc[]: tuple numbers that need to be relocated to another offset
223 : : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
224 : : * moveDest[]: new locations for moveSrc tuples
225 : : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
226 : : * chainDest[]: new values of nextOffset for chainSrc members
227 : : *
228 : : * It's easiest to figure out what we have to do by processing tuple
229 : : * chains, so we iterate over all the tuples (not just the deletable
230 : : * ones!) to identify chain heads, then chase down each chain and make
231 : : * work item entries for deletable tuples within the chain.
232 : : *----------
233 : : */
234 : 13 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
235 : :
236 [ + + ]: 2116 : for (i = FirstOffsetNumber; i <= max; i++)
237 : : {
238 : 2103 : SpGistLeafTuple head;
239 : 2103 : bool interveningDeletable;
240 : 2103 : OffsetNumber prevLive;
241 : 2103 : OffsetNumber j;
242 : :
243 : 4206 : head = (SpGistLeafTuple) PageGetItem(page,
244 : 2103 : PageGetItemId(page, i));
245 [ + + ]: 2103 : if (head->tupstate != SPGIST_LIVE)
246 : 104 : continue; /* can't be a chain member */
247 [ + + ]: 1999 : if (predecessor[i] != 0)
248 : 1984 : continue; /* not a chain head */
249 : :
250 : : /* initialize ... */
251 : 15 : interveningDeletable = false;
252 [ + + ]: 15 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
253 : :
254 : : /* scan down the chain ... */
255 : 15 : j = SGLT_GET_NEXTOFFSET(head);
256 [ + + ]: 1999 : while (j != InvalidOffsetNumber)
257 : : {
258 : 1984 : SpGistLeafTuple lt;
259 : :
260 : 3968 : lt = (SpGistLeafTuple) PageGetItem(page,
261 : 1984 : PageGetItemId(page, j));
262 [ + - ]: 1984 : if (lt->tupstate != SPGIST_LIVE)
263 : : {
264 : : /* all tuples in chain should be live */
265 [ # # # # ]: 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
266 : : lt->tupstate);
267 : 0 : }
268 : :
269 [ + + ]: 1984 : if (deletable[j])
270 : : {
271 : : /* This tuple should be replaced by a placeholder */
272 : 987 : toPlaceholder[xlrec.nPlaceholder] = j;
273 : 987 : xlrec.nPlaceholder++;
274 : : /* previous live tuple's chain link will need an update */
275 : 987 : interveningDeletable = true;
276 : 987 : }
277 [ + + ]: 997 : else if (prevLive == InvalidOffsetNumber)
278 : : {
279 : : /*
280 : : * This is the first live tuple in the chain. It has to move
281 : : * to the head position.
282 : : */
283 : 12 : moveSrc[xlrec.nMove] = j;
284 : 12 : moveDest[xlrec.nMove] = i;
285 : 12 : xlrec.nMove++;
286 : : /* Chain updates will be applied after the move */
287 : 12 : prevLive = i;
288 : 12 : interveningDeletable = false;
289 : 12 : }
290 : : else
291 : : {
292 : : /*
293 : : * Second or later live tuple. Arrange to re-chain it to the
294 : : * previous live one, if there was a gap.
295 : : */
296 [ + + ]: 985 : if (interveningDeletable)
297 : : {
298 : 2 : chainSrc[xlrec.nChain] = prevLive;
299 : 2 : chainDest[xlrec.nChain] = j;
300 : 2 : xlrec.nChain++;
301 : 2 : }
302 : 985 : prevLive = j;
303 : 985 : interveningDeletable = false;
304 : : }
305 : :
306 : 1984 : j = SGLT_GET_NEXTOFFSET(lt);
307 : 1984 : }
308 : :
309 [ + - ]: 15 : if (prevLive == InvalidOffsetNumber)
310 : : {
311 : : /* The chain is entirely removable, so we need a DEAD tuple */
312 : 0 : toDead[xlrec.nDead] = i;
313 : 0 : xlrec.nDead++;
314 : 0 : }
315 [ + + ]: 15 : else if (interveningDeletable)
316 : : {
317 : : /* One or more deletions at end of chain, so close it off */
318 : 14 : chainSrc[xlrec.nChain] = prevLive;
319 : 14 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
320 : 14 : xlrec.nChain++;
321 : 14 : }
322 [ + + ]: 2103 : }
323 : :
324 : : /* sanity check ... */
325 [ + - ]: 13 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
326 [ # # # # ]: 0 : elog(ERROR, "inconsistent counts of deletable tuples");
327 : :
328 : : /* Do the updates */
329 : 13 : START_CRIT_SECTION();
330 : :
331 : 26 : spgPageIndexMultiDelete(&bds->spgstate, page,
332 : 13 : toDead, xlrec.nDead,
333 : : SPGIST_DEAD, SPGIST_DEAD,
334 : : InvalidBlockNumber, InvalidOffsetNumber);
335 : :
336 : 26 : spgPageIndexMultiDelete(&bds->spgstate, page,
337 : 13 : toPlaceholder, xlrec.nPlaceholder,
338 : : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
339 : : InvalidBlockNumber, InvalidOffsetNumber);
340 : :
341 : : /*
342 : : * We implement the move step by swapping the line pointers of the source
343 : : * and target tuples, then replacing the newly-source tuples with
344 : : * placeholders. This is perhaps unduly friendly with the page data
345 : : * representation, but it's fast and doesn't risk page overflow when a
346 : : * tuple to be relocated is large.
347 : : */
348 [ + + ]: 25 : for (i = 0; i < xlrec.nMove; i++)
349 : : {
350 : 12 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
351 : 12 : ItemId idDest = PageGetItemId(page, moveDest[i]);
352 : 12 : ItemIdData tmp;
353 : :
354 : 12 : tmp = *idSrc;
355 : 12 : *idSrc = *idDest;
356 : 12 : *idDest = tmp;
357 : 12 : }
358 : :
359 : 26 : spgPageIndexMultiDelete(&bds->spgstate, page,
360 : 13 : moveSrc, xlrec.nMove,
361 : : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
362 : : InvalidBlockNumber, InvalidOffsetNumber);
363 : :
364 [ + + ]: 29 : for (i = 0; i < xlrec.nChain; i++)
365 : : {
366 : 16 : SpGistLeafTuple lt;
367 : :
368 : 32 : lt = (SpGistLeafTuple) PageGetItem(page,
369 : 16 : PageGetItemId(page, chainSrc[i]));
370 [ + - ]: 16 : Assert(lt->tupstate == SPGIST_LIVE);
371 : 16 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
372 : 16 : }
373 : :
374 : 13 : MarkBufferDirty(buffer);
375 : :
376 [ + - + - : 13 : if (RelationNeedsWAL(index))
+ - - + ]
377 : : {
378 : 13 : XLogRecPtr recptr;
379 : :
380 : 13 : XLogBeginInsert();
381 : :
382 : 13 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
383 : :
384 : 13 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumLeaf);
385 : : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
386 : 13 : XLogRegisterData(toDead, sizeof(OffsetNumber) * xlrec.nDead);
387 : 13 : XLogRegisterData(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
388 : 13 : XLogRegisterData(moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
389 : 13 : XLogRegisterData(moveDest, sizeof(OffsetNumber) * xlrec.nMove);
390 : 13 : XLogRegisterData(chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
391 : 13 : XLogRegisterData(chainDest, sizeof(OffsetNumber) * xlrec.nChain);
392 : :
393 : 13 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
394 : :
395 : 13 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
396 : :
397 : 13 : PageSetLSN(page, recptr);
398 : 13 : }
399 : :
400 [ + - ]: 13 : END_CRIT_SECTION();
401 : 9266 : }
402 : :
403 : : /*
404 : : * Vacuum a root page when it is also a leaf
405 : : *
406 : : * On the root, we just delete any dead leaf tuples; no fancy business
407 : : */
408 : : static void
409 : 5 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
410 : : {
411 : 5 : Page page = BufferGetPage(buffer);
412 : 5 : spgxlogVacuumRoot xlrec;
413 : 5 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
414 : 5 : OffsetNumber i,
415 : 5 : max = PageGetMaxOffsetNumber(page);
416 : :
417 : 5 : xlrec.nDelete = 0;
418 : :
419 : : /* Scan page, identify tuples to delete, accumulate stats */
420 [ + + ]: 26 : for (i = FirstOffsetNumber; i <= max; i++)
421 : : {
422 : 21 : SpGistLeafTuple lt;
423 : :
424 : 42 : lt = (SpGistLeafTuple) PageGetItem(page,
425 : 21 : PageGetItemId(page, i));
426 [ + - ]: 21 : if (lt->tupstate == SPGIST_LIVE)
427 : : {
428 [ + - ]: 21 : Assert(ItemPointerIsValid(<->heapPtr));
429 : :
430 [ - + ]: 21 : if (bds->callback(<->heapPtr, bds->callback_state))
431 : : {
432 : 0 : bds->stats->tuples_removed += 1;
433 : 0 : toDelete[xlrec.nDelete] = i;
434 : 0 : xlrec.nDelete++;
435 : 0 : }
436 : : else
437 : : {
438 : 21 : bds->stats->num_index_tuples += 1;
439 : : }
440 : 21 : }
441 : : else
442 : : {
443 : : /* all tuples on root should be live */
444 [ # # # # ]: 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
445 : : lt->tupstate);
446 : : }
447 : 21 : }
448 : :
449 [ - + ]: 5 : if (xlrec.nDelete == 0)
450 : 5 : return; /* nothing more to do */
451 : :
452 : : /* Do the update */
453 : 0 : START_CRIT_SECTION();
454 : :
455 : : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
456 : 0 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
457 : :
458 : 0 : MarkBufferDirty(buffer);
459 : :
460 [ # # # # : 0 : if (RelationNeedsWAL(index))
# # # # ]
461 : : {
462 : 0 : XLogRecPtr recptr;
463 : :
464 : 0 : XLogBeginInsert();
465 : :
466 : : /* Prepare WAL record */
467 : 0 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
468 : :
469 : 0 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRoot);
470 : : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
471 : 0 : XLogRegisterData(toDelete,
472 : 0 : sizeof(OffsetNumber) * xlrec.nDelete);
473 : :
474 : 0 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
475 : :
476 : 0 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
477 : :
478 : 0 : PageSetLSN(page, recptr);
479 : 0 : }
480 : :
481 [ # # ]: 0 : END_CRIT_SECTION();
482 [ - + ]: 5 : }
483 : :
484 : : /*
485 : : * Clean up redirect and placeholder tuples on the given page
486 : : *
487 : : * Redirect tuples can be marked placeholder once they're old enough.
488 : : * Placeholder tuples can be removed if it won't change the offsets of
489 : : * non-placeholder ones.
490 : : *
491 : : * Unlike the routines above, this works on both leaf and inner pages.
492 : : */
493 : : static void
494 : 9289 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
495 : : {
496 : 9289 : Page page = BufferGetPage(buffer);
497 : 9289 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
498 : 18578 : OffsetNumber i,
499 : 9289 : max = PageGetMaxOffsetNumber(page),
500 : 9289 : firstPlaceholder = InvalidOffsetNumber;
501 : 9289 : bool hasNonPlaceholder = false;
502 : 9289 : bool hasUpdate = false;
503 : 9289 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
504 : 9289 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
505 : 9289 : spgxlogVacuumRedirect xlrec;
506 : 9289 : GlobalVisState *vistest;
507 : :
508 [ + - - + : 9289 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
# # # # #
# # # # #
# # # # ]
509 : 9289 : xlrec.nToPlaceholder = 0;
510 : 9289 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
511 : :
512 : 9289 : vistest = GlobalVisTestFor(heaprel);
513 : :
514 : 9289 : START_CRIT_SECTION();
515 : :
516 : : /*
517 : : * Scan backwards to convert old redirection tuples to placeholder tuples,
518 : : * and identify location of last non-placeholder tuple while at it.
519 : : */
520 [ + + ]: 2702938 : for (i = max;
521 [ + + ]: 1351469 : i >= FirstOffsetNumber &&
522 [ + + ]: 1342812 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
523 : 1342180 : i--)
524 : : {
525 : 1342180 : SpGistDeadTuple dt;
526 : :
527 : 1342180 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
528 : :
529 : : /*
530 : : * We can convert a REDIRECT to a PLACEHOLDER if there could no longer
531 : : * be any index scans "in flight" to it. Such an index scan would
532 : : * have to be in a transaction whose snapshot sees the REDIRECT's XID
533 : : * as still running, so comparing the XID against global xmin is a
534 : : * conservatively safe test. If the XID is invalid, it must have been
535 : : * inserted by REINDEX CONCURRENTLY, so we can zap it immediately.
536 : : */
537 [ + + + + ]: 1350829 : if (dt->tupstate == SPGIST_REDIRECT &&
538 [ + - ]: 8649 : (!TransactionIdIsValid(dt->xid) ||
539 : 8649 : GlobalVisTestIsRemovableXid(vistest, dt->xid)))
540 : : {
541 : 3 : dt->tupstate = SPGIST_PLACEHOLDER;
542 [ - + ]: 3 : Assert(opaque->nRedirection > 0);
543 : 3 : opaque->nRedirection--;
544 : 3 : opaque->nPlaceholder++;
545 : :
546 : : /* remember newest XID among the removed redirects */
547 [ + + - + ]: 3 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
548 : 1 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
549 : 2 : xlrec.snapshotConflictHorizon = dt->xid;
550 : :
551 : 3 : ItemPointerSetInvalid(&dt->pointer);
552 : :
553 : 3 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
554 : 3 : xlrec.nToPlaceholder++;
555 : :
556 : 3 : hasUpdate = true;
557 : 3 : }
558 : :
559 [ + + ]: 1342180 : if (dt->tupstate == SPGIST_PLACEHOLDER)
560 : : {
561 [ + + ]: 22544 : if (!hasNonPlaceholder)
562 : 22537 : firstPlaceholder = i;
563 : 22544 : }
564 : : else
565 : : {
566 : 1319636 : hasNonPlaceholder = true;
567 : : }
568 : 1342180 : }
569 : :
570 : : /*
571 : : * Any placeholder tuples at the end of page can safely be removed. We
572 : : * can't remove ones before the last non-placeholder, though, because we
573 : : * can't alter the offset numbers of non-placeholder tuples.
574 : : */
575 [ + + ]: 9289 : if (firstPlaceholder != InvalidOffsetNumber)
576 : : {
577 : : /*
578 : : * We do not store this array to rdata because it's easy to recreate.
579 : : */
580 [ + + ]: 22896 : for (i = firstPlaceholder; i <= max; i++)
581 : 22537 : itemnos[i - firstPlaceholder] = i;
582 : :
583 : 359 : i = max - firstPlaceholder + 1;
584 [ + - ]: 359 : Assert(opaque->nPlaceholder >= i);
585 : 359 : opaque->nPlaceholder -= i;
586 : :
587 : : /* The array is surely sorted, so can use PageIndexMultiDelete */
588 : 359 : PageIndexMultiDelete(page, itemnos, i);
589 : :
590 : 359 : hasUpdate = true;
591 : 359 : }
592 : :
593 : 9289 : xlrec.firstPlaceholder = firstPlaceholder;
594 : :
595 [ + + ]: 9289 : if (hasUpdate)
596 : 359 : MarkBufferDirty(buffer);
597 : :
598 [ + + + - : 9289 : if (hasUpdate && RelationNeedsWAL(index))
+ - + - -
+ ]
599 : : {
600 : 359 : XLogRecPtr recptr;
601 : :
602 : 359 : XLogBeginInsert();
603 : :
604 : 359 : XLogRegisterData(&xlrec, SizeOfSpgxlogVacuumRedirect);
605 : 718 : XLogRegisterData(itemToPlaceholder,
606 : 359 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
607 : :
608 : 359 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
609 : :
610 : 359 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
611 : :
612 : 359 : PageSetLSN(page, recptr);
613 : 359 : }
614 : :
615 [ + - ]: 9289 : END_CRIT_SECTION();
616 : 9289 : }
617 : :
618 : : /*
619 : : * Process one page during a bulkdelete scan
620 : : */
621 : : static void
622 : 662 : spgvacuumpage(spgBulkDeleteState *bds, Buffer buffer)
623 : : {
624 : 662 : Relation index = bds->info->index;
625 : 662 : BlockNumber blkno = BufferGetBlockNumber(buffer);
626 : 662 : Page page;
627 : :
628 : 662 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
629 : 662 : page = BufferGetPage(buffer);
630 : :
631 [ - + ]: 662 : if (PageIsNew(page))
632 : : {
633 : : /*
634 : : * We found an all-zero page, which could happen if the database
635 : : * crashed just after extending the file. Recycle it.
636 : : */
637 : 0 : }
638 [ + + ]: 662 : else if (PageIsEmpty(page))
639 : : {
640 : : /* nothing to do */
641 : 14 : }
642 [ + + ]: 648 : else if (SpGistPageIsLeaf(page))
643 : : {
644 [ + + + + ]: 625 : if (SpGistBlockIsRoot(blkno))
645 : : {
646 : 5 : vacuumLeafRoot(bds, index, buffer);
647 : : /* no need for vacuumRedirectAndPlaceholder */
648 : 5 : }
649 : : else
650 : : {
651 : 620 : vacuumLeafPage(bds, index, buffer, false);
652 : 620 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
653 : : }
654 : 625 : }
655 : : else
656 : : {
657 : : /* inner page */
658 : 23 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
659 : : }
660 : :
661 : : /*
662 : : * The root pages must never be deleted, nor marked as available in FSM,
663 : : * because we don't want them ever returned by a search for a place to put
664 : : * a new tuple. Otherwise, check for empty page, and make sure the FSM
665 : : * knows about it.
666 : : */
667 [ + + + + ]: 662 : if (!SpGistBlockIsRoot(blkno))
668 : : {
669 [ + - + + ]: 640 : if (PageIsNew(page) || PageIsEmpty(page))
670 : : {
671 : 8 : RecordFreeIndexPage(index, blkno);
672 : 8 : bds->stats->pages_deleted++;
673 : 8 : }
674 : : else
675 : : {
676 : 632 : SpGistSetLastUsedPage(index, buffer);
677 : 632 : bds->lastFilledBlock = blkno;
678 : : }
679 : 640 : }
680 : :
681 : 662 : UnlockReleaseBuffer(buffer);
682 : 662 : }
683 : :
684 : : /*
685 : : * Process the pending-TID list between pages of the main scan
686 : : */
687 : : static void
688 : 131 : spgprocesspending(spgBulkDeleteState *bds)
689 : : {
690 : 131 : Relation index = bds->info->index;
691 : 131 : spgVacPendingItem *pitem;
692 : 131 : spgVacPendingItem *nitem;
693 : 131 : BlockNumber blkno;
694 : 131 : Buffer buffer;
695 : 131 : Page page;
696 : :
697 [ + + ]: 17554 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
698 : : {
699 [ + + ]: 17423 : if (pitem->done)
700 : 8646 : continue; /* ignore already-done items */
701 : :
702 : : /* call vacuum_delay_point while not holding any buffer lock */
703 : 8777 : vacuum_delay_point(false);
704 : :
705 : : /* examine the referenced page */
706 : 8777 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
707 : 17554 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
708 : 8777 : RBM_NORMAL, bds->info->strategy);
709 : 8777 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
710 : 8777 : page = BufferGetPage(buffer);
711 : :
712 [ + - - + ]: 8777 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
713 : : {
714 : : /* Probably shouldn't happen, but ignore it */
715 : 0 : }
716 [ + + ]: 8777 : else if (SpGistPageIsLeaf(page))
717 : : {
718 [ + - ]: 8646 : if (SpGistBlockIsRoot(blkno))
719 : : {
720 : : /* this should definitely not happen */
721 [ # # # # ]: 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
722 : : RelationGetRelationName(index));
723 : 0 : }
724 : :
725 : : /* deal with any deletable tuples */
726 : 8646 : vacuumLeafPage(bds, index, buffer, true);
727 : : /* might as well do this while we are here */
728 : 8646 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
729 : :
730 : 8646 : SpGistSetLastUsedPage(index, buffer);
731 : :
732 : : /*
733 : : * We can mark as done not only this item, but any later ones
734 : : * pointing at the same page, since we vacuumed the whole page.
735 : : */
736 : 8646 : pitem->done = true;
737 [ + + ]: 758227 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
738 : : {
739 [ + + ]: 749581 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
740 : 131 : nitem->done = true;
741 : 749581 : }
742 : 8646 : }
743 : : else
744 : : {
745 : : /*
746 : : * On an inner page, visit the referenced inner tuple and add all
747 : : * its downlinks to the pending list. We might have pending items
748 : : * for more than one inner tuple on the same page (in fact this is
749 : : * pretty likely given the way space allocation works), so get
750 : : * them all while we are here.
751 : : */
752 [ + + ]: 17554 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
753 : : {
754 [ - + ]: 17423 : if (nitem->done)
755 : 0 : continue;
756 [ + + ]: 17423 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
757 : : {
758 : 8646 : OffsetNumber offset;
759 : 8646 : SpGistInnerTuple innerTuple;
760 : :
761 : 8646 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
762 : 17292 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
763 : 8646 : PageGetItemId(page, offset));
764 [ + - ]: 8646 : if (innerTuple->tupstate == SPGIST_LIVE)
765 : : {
766 : 8646 : SpGistNodeTuple node;
767 : 8646 : int i;
768 : :
769 [ + + ]: 43230 : SGITITERATE(innerTuple, i, node)
770 : : {
771 [ + + ]: 34584 : if (ItemPointerIsValid(&node->t_tid))
772 : 17292 : spgAddPendingTID(bds, &node->t_tid);
773 : 34584 : }
774 : 8646 : }
775 [ # # ]: 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
776 : : {
777 : : /* transfer attention to redirect point */
778 : 0 : spgAddPendingTID(bds,
779 : 0 : &((SpGistDeadTuple) innerTuple)->pointer);
780 : 0 : }
781 : : else
782 [ # # # # ]: 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
783 : : innerTuple->tupstate);
784 : :
785 : 8646 : nitem->done = true;
786 : 8646 : }
787 : 17423 : }
788 : : }
789 : :
790 : 8777 : UnlockReleaseBuffer(buffer);
791 : 8777 : }
792 : :
793 : 131 : spgClearPendingList(bds);
794 : 131 : }
795 : :
796 : : /*
797 : : * Perform a bulkdelete scan
798 : : */
799 : : static void
800 : 11 : spgvacuumscan(spgBulkDeleteState *bds)
801 : : {
802 : 11 : Relation index = bds->info->index;
803 : 11 : bool needLock;
804 : 11 : BlockNumber num_pages;
805 : 11 : BlockRangeReadStreamPrivate p;
806 : 11 : ReadStream *stream = NULL;
807 : :
808 : : /* Finish setting up spgBulkDeleteState */
809 : 11 : initSpGistState(&bds->spgstate, index);
810 : 11 : bds->pendingList = NULL;
811 : 11 : bds->myXmin = GetActiveSnapshot()->xmin;
812 : 11 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
813 : :
814 : : /*
815 : : * Reset counts that will be incremented during the scan; needed in case
816 : : * of multiple scans during a single VACUUM command
817 : : */
818 : 11 : bds->stats->estimated_count = false;
819 : 11 : bds->stats->num_index_tuples = 0;
820 : 11 : bds->stats->pages_deleted = 0;
821 : :
822 : : /* We can skip locking for new or temp relations */
823 [ - + ]: 11 : needLock = !RELATION_IS_LOCAL(index);
824 : 11 : p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
825 : :
826 : : /*
827 : : * It is safe to use batchmode as block_range_read_stream_cb takes no
828 : : * locks.
829 : : */
830 : 11 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
831 : : READ_STREAM_FULL |
832 : : READ_STREAM_USE_BATCHING,
833 : 11 : bds->info->strategy,
834 : 11 : index,
835 : : MAIN_FORKNUM,
836 : : block_range_read_stream_cb,
837 : : &p,
838 : : 0);
839 : :
840 : : /*
841 : : * The outer loop iterates over all index pages except the metapage, in
842 : : * physical order (we hope the kernel will cooperate in providing
843 : : * read-ahead for speed). It is critical that we visit all leaf pages,
844 : : * including ones added after we start the scan, else we might fail to
845 : : * delete some deletable tuples. See more extensive comments about this
846 : : * in btvacuumscan().
847 : : */
848 : 22 : for (;;)
849 : : {
850 : : /* Get the current relation length */
851 [ - + ]: 22 : if (needLock)
852 : 22 : LockRelationForExtension(index, ExclusiveLock);
853 : 22 : num_pages = RelationGetNumberOfBlocks(index);
854 [ - + ]: 22 : if (needLock)
855 : 22 : UnlockRelationForExtension(index, ExclusiveLock);
856 : :
857 : : /* Quit if we've scanned the whole relation */
858 [ + + ]: 22 : if (p.current_blocknum >= num_pages)
859 : 11 : break;
860 : :
861 : 11 : p.last_exclusive = num_pages;
862 : :
863 : : /* Iterate over pages, then loop back to recheck length */
864 : 673 : while (true)
865 : : {
866 : 673 : Buffer buf;
867 : :
868 : : /* call vacuum_delay_point while not holding any buffer lock */
869 : 673 : vacuum_delay_point(false);
870 : :
871 : 673 : buf = read_stream_next_buffer(stream, NULL);
872 : :
873 [ + + ]: 673 : if (!BufferIsValid(buf))
874 : 11 : break;
875 : :
876 : 662 : spgvacuumpage(bds, buf);
877 : :
878 : : /* empty the pending-list after each page */
879 [ + + ]: 662 : if (bds->pendingList != NULL)
880 : 131 : spgprocesspending(bds);
881 [ - + + ]: 673 : }
882 : :
883 : : /*
884 : : * We have to reset the read stream to use it again. After returning
885 : : * InvalidBuffer, the read stream API won't invoke our callback again
886 : : * until the stream has been reset.
887 : : */
888 : 11 : read_stream_reset(stream);
889 : : }
890 : :
891 : 11 : read_stream_end(stream);
892 : :
893 : : /* Propagate local lastUsedPages cache to metablock */
894 : 11 : SpGistUpdateMetaPage(index);
895 : :
896 : : /*
897 : : * If we found any empty pages (and recorded them in the FSM), then
898 : : * forcibly update the upper-level FSM pages to ensure that searchers can
899 : : * find them. It's possible that the pages were also found during
900 : : * previous scans and so this is a waste of time, but it's cheap enough
901 : : * relative to scanning the index that it shouldn't matter much, and
902 : : * making sure that free pages are available sooner not later seems
903 : : * worthwhile.
904 : : *
905 : : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
906 : : * all.
907 : : */
908 [ + + ]: 11 : if (bds->stats->pages_deleted > 0)
909 : 7 : IndexFreeSpaceMapVacuum(index);
910 : :
911 : : /*
912 : : * Truncate index if possible
913 : : *
914 : : * XXX disabled because it's unsafe due to possible concurrent inserts.
915 : : * We'd have to rescan the pages to make sure they're still empty, and it
916 : : * doesn't seem worth it. Note that btree doesn't do this either.
917 : : *
918 : : * Another reason not to truncate is that it could invalidate the cached
919 : : * pages-with-freespace pointers in the metapage and other backends'
920 : : * relation caches, that is leave them pointing to nonexistent pages.
921 : : * Adding RelationGetNumberOfBlocks calls to protect the places that use
922 : : * those pointers would be unduly expensive.
923 : : */
924 : : #ifdef NOT_USED
925 : : if (num_pages > bds->lastFilledBlock + 1)
926 : : {
927 : : BlockNumber lastBlock = num_pages - 1;
928 : :
929 : : num_pages = bds->lastFilledBlock + 1;
930 : : RelationTruncate(index, num_pages);
931 : : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
932 : : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
933 : : }
934 : : #endif
935 : :
936 : : /* Report final stats */
937 : 11 : bds->stats->num_pages = num_pages;
938 : 11 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
939 : 11 : bds->stats->pages_free = bds->stats->pages_deleted;
940 : 11 : }
941 : :
942 : : /*
943 : : * Bulk deletion of all index entries pointing to a set of heap tuples.
944 : : * The set of target tuples is specified via a callback routine that tells
945 : : * whether any given heap tuple (identified by ItemPointer) is being deleted.
946 : : *
947 : : * Result: a palloc'd struct containing statistical info for VACUUM displays.
948 : : */
949 : : IndexBulkDeleteResult *
950 : 1 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
951 : : IndexBulkDeleteCallback callback, void *callback_state)
952 : : {
953 : 1 : spgBulkDeleteState bds;
954 : :
955 : : /* allocate stats if first time through, else re-use existing struct */
956 [ - + ]: 1 : if (stats == NULL)
957 : 1 : stats = palloc0_object(IndexBulkDeleteResult);
958 : 1 : bds.info = info;
959 : 1 : bds.stats = stats;
960 : 1 : bds.callback = callback;
961 : 1 : bds.callback_state = callback_state;
962 : :
963 : 1 : spgvacuumscan(&bds);
964 : :
965 : 2 : return stats;
966 : 1 : }
967 : :
968 : : /* Dummy callback to delete no tuples during spgvacuumcleanup */
969 : : static bool
970 : 1389819 : dummy_callback(ItemPointer itemptr, void *state)
971 : : {
972 : 1389819 : return false;
973 : : }
974 : :
975 : : /*
976 : : * Post-VACUUM cleanup.
977 : : *
978 : : * Result: a palloc'd struct containing statistical info for VACUUM displays.
979 : : */
980 : : IndexBulkDeleteResult *
981 : 11 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
982 : : {
983 : 11 : spgBulkDeleteState bds;
984 : :
985 : : /* No-op in ANALYZE ONLY mode */
986 [ - + ]: 11 : if (info->analyze_only)
987 : 0 : return stats;
988 : :
989 : : /*
990 : : * We don't need to scan the index if there was a preceding bulkdelete
991 : : * pass. Otherwise, make a pass that won't delete any live tuples, but
992 : : * might still accomplish useful stuff with redirect/placeholder cleanup
993 : : * and/or FSM housekeeping, and in any case will provide stats.
994 : : */
995 [ + + ]: 11 : if (stats == NULL)
996 : : {
997 : 10 : stats = palloc0_object(IndexBulkDeleteResult);
998 : 10 : bds.info = info;
999 : 10 : bds.stats = stats;
1000 : 10 : bds.callback = dummy_callback;
1001 : 10 : bds.callback_state = NULL;
1002 : :
1003 : 10 : spgvacuumscan(&bds);
1004 : 10 : }
1005 : :
1006 : : /*
1007 : : * It's quite possible for us to be fooled by concurrent tuple moves into
1008 : : * double-counting some index tuples, so disbelieve any total that exceeds
1009 : : * the underlying heap's count ... if we know that accurately. Otherwise
1010 : : * this might just make matters worse.
1011 : : */
1012 [ - + ]: 11 : if (!info->estimated_count)
1013 : : {
1014 [ + - ]: 11 : if (stats->num_index_tuples > info->num_heap_tuples)
1015 : 0 : stats->num_index_tuples = info->num_heap_tuples;
1016 : 11 : }
1017 : :
1018 : 11 : return stats;
1019 : 11 : }
|