Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * vacuumlazy.c
4 : : * Concurrent ("lazy") vacuuming.
5 : : *
6 : : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : : * deletes the dead index entries referenced in the TID store. In phase III,
11 : : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : : * store and reaps the corresponding dead items, freeing that space for future
13 : : * tuples.
14 : : *
15 : : * If there are no indexes or index scanning is disabled, phase II may be
16 : : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : : * vacuum may skip phases II and III.
19 : : *
20 : : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : : *
24 : : * In a way, the phases are more like states in a state machine, but they have
25 : : * been referred to colloquially as phases for so long that they are referred
26 : : * to as such here.
27 : : *
28 : : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : : * more information on this, see the comment at the top of vacuumparallel.c.
30 : : *
31 : : * In between phases, vacuum updates the freespace map (every
32 : : * VACUUM_FSM_EVERY_PAGES).
33 : : *
34 : : * After completing all three phases, vacuum may truncate the relation if it
35 : : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : : * in pg_class and the cumulative statistics subsystem.
37 : : *
38 : : * Relation Scanning:
39 : : *
40 : : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : : * options, and various other requirements.
43 : : *
44 : : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : : * two reasons:
48 : : *
49 : : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : : *
54 : : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : : * to be processed by the next aggressive vacuum. These are referred to as
57 : : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : : * count as eagerly scanned pages.
59 : : *
60 : : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : : * eager freezes and those not set all-frozen in the VM are failed eager
62 : : * freezes.
63 : : *
64 : : * Because we want to amortize the overhead of freezing pages over multiple
65 : : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : : * successful eager freezes also caps the downside of eager freezing:
70 : : * potentially wasted work.
71 : : *
72 : : * Once the success cap has been hit, eager scanning is disabled for the
73 : : * remainder of the vacuum of the relation.
74 : : *
75 : : * Success is capped globally because we don't want to limit our successes if
76 : : * old data happens to be concentrated in a particular part of the table. This
77 : : * is especially likely to happen for append-mostly workloads where the oldest
78 : : * data is at the beginning of the unfrozen portion of the relation.
79 : : *
80 : : * On the assumption that different regions of the table are likely to contain
81 : : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : : * cap. The failure count is reset for each region of the table -- comprised
83 : : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : : * before suspending eager scanning until the end of the region.
86 : : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : : * table.
88 : : *
89 : : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : : * subject to any of the limits imposed by the eager scanning algorithm.
91 : : *
92 : : * Once vacuum has decided to scan a given block, it must read the block and
93 : : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : : * and their associated index entries (though it is free to reap any existing
97 : : * dead items on the page).
98 : : *
99 : : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : : * are marked as such in the visibility map.
101 : : *
102 : : * Dead TID Storage:
103 : : *
104 : : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : : * very largest relations with finite memory space usage. To do that, we set
107 : : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : : * at once.
109 : : *
110 : : * We are willing to use at most maintenance_work_mem (or perhaps
111 : : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : : * the pages that we've pruned). This frees up the memory space dedicated to
114 : : * store dead TIDs.
115 : : *
116 : : * In practice VACUUM will often complete its initial pass over the target
117 : : * heap relation without ever running out of space to store TIDs. This means
118 : : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : : * completes.
120 : : *
121 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : : * Portions Copyright (c) 1994, Regents of the University of California
123 : : *
124 : : *
125 : : * IDENTIFICATION
126 : : * src/backend/access/heap/vacuumlazy.c
127 : : *
128 : : *-------------------------------------------------------------------------
129 : : */
130 : : #include "postgres.h"
131 : :
132 : : #include "access/genam.h"
133 : : #include "access/heapam.h"
134 : : #include "access/htup_details.h"
135 : : #include "access/multixact.h"
136 : : #include "access/tidstore.h"
137 : : #include "access/transam.h"
138 : : #include "access/visibilitymap.h"
139 : : #include "access/xloginsert.h"
140 : : #include "catalog/storage.h"
141 : : #include "commands/progress.h"
142 : : #include "commands/vacuum.h"
143 : : #include "common/int.h"
144 : : #include "common/pg_prng.h"
145 : : #include "executor/instrument.h"
146 : : #include "miscadmin.h"
147 : : #include "pgstat.h"
148 : : #include "portability/instr_time.h"
149 : : #include "postmaster/autovacuum.h"
150 : : #include "storage/bufmgr.h"
151 : : #include "storage/freespace.h"
152 : : #include "storage/lmgr.h"
153 : : #include "storage/read_stream.h"
154 : : #include "utils/lsyscache.h"
155 : : #include "utils/pg_rusage.h"
156 : : #include "utils/timestamp.h"
157 : :
158 : :
159 : : /*
160 : : * Space/time tradeoff parameters: do these need to be user-tunable?
161 : : *
162 : : * To consider truncating the relation, we want there to be at least
163 : : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
164 : : * is less) potentially-freeable pages.
165 : : */
166 : : #define REL_TRUNCATE_MINIMUM 1000
167 : : #define REL_TRUNCATE_FRACTION 16
168 : :
169 : : /*
170 : : * Timing parameters for truncate locking heuristics.
171 : : *
172 : : * These were not exposed as user tunable GUC values because it didn't seem
173 : : * that the potential for improvement was great enough to merit the cost of
174 : : * supporting them.
175 : : */
176 : : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
177 : : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
178 : : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
179 : :
180 : : /*
181 : : * Threshold that controls whether we bypass index vacuuming and heap
182 : : * vacuuming as an optimization
183 : : */
184 : : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
185 : :
186 : : /*
187 : : * Perform a failsafe check each time we scan another 4GB of pages.
188 : : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
189 : : */
190 : : #define FAILSAFE_EVERY_PAGES \
191 : : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
192 : :
193 : : /*
194 : : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
195 : : * (it won't be exact because we only vacuum FSM after processing a heap page
196 : : * that has some removable tuples). When there are indexes, this is ignored,
197 : : * and we vacuum FSM after each index/heap cleaning pass.
198 : : */
199 : : #define VACUUM_FSM_EVERY_PAGES \
200 : : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
201 : :
202 : : /*
203 : : * Before we consider skipping a page that's marked as clean in
204 : : * visibility map, we must've seen at least this many clean pages.
205 : : */
206 : : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
207 : :
208 : : /*
209 : : * Size of the prefetch window for lazy vacuum backwards truncation scan.
210 : : * Needs to be a power of 2.
211 : : */
212 : : #define PREFETCH_SIZE ((BlockNumber) 32)
213 : :
214 : : /*
215 : : * Macro to check if we are in a parallel vacuum. If true, we are in the
216 : : * parallel mode and the DSM segment is initialized.
217 : : */
218 : : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
219 : :
220 : : /* Phases of vacuum during which we report error context. */
221 : : typedef enum
222 : : {
223 : : VACUUM_ERRCB_PHASE_UNKNOWN,
224 : : VACUUM_ERRCB_PHASE_SCAN_HEAP,
225 : : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
226 : : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
227 : : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
228 : : VACUUM_ERRCB_PHASE_TRUNCATE,
229 : : } VacErrPhase;
230 : :
231 : : /*
232 : : * An eager scan of a page that is set all-frozen in the VM is considered
233 : : * "successful". To spread out freezing overhead across multiple normal
234 : : * vacuums, we limit the number of successful eager page freezes. The maximum
235 : : * number of eager page freezes is calculated as a ratio of the all-visible
236 : : * but not all-frozen pages at the beginning of the vacuum.
237 : : */
238 : : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
239 : :
240 : : /*
241 : : * On the assumption that different regions of the table tend to have
242 : : * similarly aged data, once vacuum fails to freeze
243 : : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
244 : : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
245 : : * to another region of the table with potentially older data.
246 : : */
247 : : #define EAGER_SCAN_REGION_SIZE 4096
248 : :
249 : : /*
250 : : * heap_vac_scan_next_block() sets these flags to communicate information
251 : : * about the block it read to the caller.
252 : : */
253 : : #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
254 : : #define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
255 : :
256 : : typedef struct LVRelState
257 : : {
258 : : /* Target heap relation and its indexes */
259 : : Relation rel;
260 : : Relation *indrels;
261 : : int nindexes;
262 : :
263 : : /* Buffer access strategy and parallel vacuum state */
264 : : BufferAccessStrategy bstrategy;
265 : : ParallelVacuumState *pvs;
266 : :
267 : : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
268 : : bool aggressive;
269 : : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
270 : : bool skipwithvm;
271 : : /* Consider index vacuuming bypass optimization? */
272 : : bool consider_bypass_optimization;
273 : :
274 : : /* Doing index vacuuming, index cleanup, rel truncation? */
275 : : bool do_index_vacuuming;
276 : : bool do_index_cleanup;
277 : : bool do_rel_truncate;
278 : :
279 : : /* VACUUM operation's cutoffs for freezing and pruning */
280 : : struct VacuumCutoffs cutoffs;
281 : : GlobalVisState *vistest;
282 : : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
283 : : TransactionId NewRelfrozenXid;
284 : : MultiXactId NewRelminMxid;
285 : : bool skippedallvis;
286 : :
287 : : /* Error reporting state */
288 : : char *dbname;
289 : : char *relnamespace;
290 : : char *relname;
291 : : char *indname; /* Current index name */
292 : : BlockNumber blkno; /* used only for heap operations */
293 : : OffsetNumber offnum; /* used only for heap operations */
294 : : VacErrPhase phase;
295 : : bool verbose; /* VACUUM VERBOSE? */
296 : :
297 : : /*
298 : : * dead_items stores TIDs whose index tuples are deleted by index
299 : : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
300 : : * that has been processed by lazy_scan_prune. Also needed by
301 : : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
302 : : * LP_UNUSED during second heap pass.
303 : : *
304 : : * Both dead_items and dead_items_info are allocated in shared memory in
305 : : * parallel vacuum cases.
306 : : */
307 : : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
308 : : VacDeadItemsInfo *dead_items_info;
309 : :
310 : : BlockNumber rel_pages; /* total number of pages */
311 : : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
312 : :
313 : : /*
314 : : * Count of all-visible blocks eagerly scanned (for logging only). This
315 : : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
316 : : */
317 : : BlockNumber eager_scanned_pages;
318 : :
319 : : BlockNumber removed_pages; /* # pages removed by relation truncation */
320 : : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
321 : :
322 : : /* # pages newly set all-visible in the VM */
323 : : BlockNumber vm_new_visible_pages;
324 : :
325 : : /*
326 : : * # pages newly set all-visible and all-frozen in the VM. This is a
327 : : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
328 : : * all pages set all-visible, but vm_new_visible_frozen_pages includes
329 : : * only those which were also set all-frozen.
330 : : */
331 : : BlockNumber vm_new_visible_frozen_pages;
332 : :
333 : : /* # all-visible pages newly set all-frozen in the VM */
334 : : BlockNumber vm_new_frozen_pages;
335 : :
336 : : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
337 : : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
338 : : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
339 : :
340 : : /* Statistics output by us, for table */
341 : : double new_rel_tuples; /* new estimated total # of tuples */
342 : : double new_live_tuples; /* new estimated total # of live tuples */
343 : : /* Statistics output by index AMs */
344 : : IndexBulkDeleteResult **indstats;
345 : :
346 : : /* Instrumentation counters */
347 : : int num_index_scans;
348 : : int num_dead_items_resets;
349 : : Size total_dead_items_bytes;
350 : : /* Counters that follow are only for scanned_pages */
351 : : int64 tuples_deleted; /* # deleted from table */
352 : : int64 tuples_frozen; /* # newly frozen */
353 : : int64 lpdead_items; /* # deleted from indexes */
354 : : int64 live_tuples; /* # live tuples remaining */
355 : : int64 recently_dead_tuples; /* # dead, but not yet removable */
356 : : int64 missed_dead_tuples; /* # removable, but not removed */
357 : :
358 : : /* State maintained by heap_vac_scan_next_block() */
359 : : BlockNumber current_block; /* last block returned */
360 : : BlockNumber next_unskippable_block; /* next unskippable block */
361 : : bool next_unskippable_allvis; /* its visibility status */
362 : : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
363 : : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
364 : :
365 : : /* State related to managing eager scanning of all-visible pages */
366 : :
367 : : /*
368 : : * A normal vacuum that has failed to freeze too many eagerly scanned
369 : : * blocks in a region suspends eager scanning.
370 : : * next_eager_scan_region_start is the block number of the first block
371 : : * eligible for resumed eager scanning.
372 : : *
373 : : * When eager scanning is permanently disabled, either initially
374 : : * (including for aggressive vacuum) or due to hitting the success cap,
375 : : * this is set to InvalidBlockNumber.
376 : : */
377 : : BlockNumber next_eager_scan_region_start;
378 : :
379 : : /*
380 : : * The remaining number of blocks a normal vacuum will consider eager
381 : : * scanning when it is successful. When eager scanning is enabled, this is
382 : : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
383 : : * all-visible but not all-frozen pages. For each eager freeze success,
384 : : * this is decremented. Once it hits 0, eager scanning is permanently
385 : : * disabled. It is initialized to 0 if eager scanning starts out disabled
386 : : * (including for aggressive vacuum).
387 : : */
388 : : BlockNumber eager_scan_remaining_successes;
389 : :
390 : : /*
391 : : * The maximum number of blocks which may be eagerly scanned and not
392 : : * frozen before eager scanning is temporarily suspended. This is
393 : : * configurable both globally, via the
394 : : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
395 : : * storage parameter of the same name. It is calculated as
396 : : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
397 : : * It is 0 when eager scanning is disabled.
398 : : */
399 : : BlockNumber eager_scan_max_fails_per_region;
400 : :
401 : : /*
402 : : * The number of eagerly scanned blocks vacuum failed to freeze (due to
403 : : * age) in the current eager scan region. Vacuum resets it to
404 : : * eager_scan_max_fails_per_region each time it enters a new region of the
405 : : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
406 : : * suspended until the next region. It is also 0 if eager scanning has
407 : : * been permanently disabled.
408 : : */
409 : : BlockNumber eager_scan_remaining_fails;
410 : : } LVRelState;
411 : :
412 : :
413 : : /* Struct for saving and restoring vacuum error information. */
414 : : typedef struct LVSavedErrInfo
415 : : {
416 : : BlockNumber blkno;
417 : : OffsetNumber offnum;
418 : : VacErrPhase phase;
419 : : } LVSavedErrInfo;
420 : :
421 : :
422 : : /* non-export function prototypes */
423 : : static void lazy_scan_heap(LVRelState *vacrel);
424 : : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
425 : : const VacuumParams params);
426 : : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
427 : : void *callback_private_data,
428 : : void *per_buffer_data);
429 : : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
430 : : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
431 : : BlockNumber blkno, Page page,
432 : : bool sharelock, Buffer vmbuffer);
433 : : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
434 : : BlockNumber blkno, Page page,
435 : : Buffer vmbuffer, bool all_visible_according_to_vm,
436 : : bool *has_lpdead_items, bool *vm_page_frozen);
437 : : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
438 : : BlockNumber blkno, Page page,
439 : : bool *has_lpdead_items);
440 : : static void lazy_vacuum(LVRelState *vacrel);
441 : : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
442 : : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
443 : : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
444 : : Buffer buffer, OffsetNumber *deadoffsets,
445 : : int num_offsets, Buffer vmbuffer);
446 : : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
447 : : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
448 : : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
449 : : IndexBulkDeleteResult *istat,
450 : : double reltuples,
451 : : LVRelState *vacrel);
452 : : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
453 : : IndexBulkDeleteResult *istat,
454 : : double reltuples,
455 : : bool estimated_count,
456 : : LVRelState *vacrel);
457 : : static bool should_attempt_truncation(LVRelState *vacrel);
458 : : static void lazy_truncate_heap(LVRelState *vacrel);
459 : : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
460 : : bool *lock_waiter_detected);
461 : : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
462 : : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
463 : : int num_offsets);
464 : : static void dead_items_reset(LVRelState *vacrel);
465 : : static void dead_items_cleanup(LVRelState *vacrel);
466 : :
467 : : #ifdef USE_ASSERT_CHECKING
468 : : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
469 : : TransactionId OldestXmin,
470 : : bool *all_frozen,
471 : : TransactionId *visibility_cutoff_xid,
472 : : OffsetNumber *logging_offnum);
473 : : #endif
474 : : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
475 : : TransactionId OldestXmin,
476 : : OffsetNumber *deadoffsets,
477 : : int ndeadoffsets,
478 : : bool *all_frozen,
479 : : TransactionId *visibility_cutoff_xid,
480 : : OffsetNumber *logging_offnum);
481 : : static void update_relstats_all_indexes(LVRelState *vacrel);
482 : : static void vacuum_error_callback(void *arg);
483 : : static void update_vacuum_error_info(LVRelState *vacrel,
484 : : LVSavedErrInfo *saved_vacrel,
485 : : int phase, BlockNumber blkno,
486 : : OffsetNumber offnum);
487 : : static void restore_vacuum_error_info(LVRelState *vacrel,
488 : : const LVSavedErrInfo *saved_vacrel);
489 : :
490 : :
491 : :
492 : : /*
493 : : * Helper to set up the eager scanning state for vacuuming a single relation.
494 : : * Initializes the eager scan management related members of the LVRelState.
495 : : *
496 : : * Caller provides whether or not an aggressive vacuum is required due to
497 : : * vacuum options or for relfrozenxid/relminmxid advancement.
498 : : */
499 : : static void
500 : 645 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
501 : : {
502 : 645 : uint32 randseed;
503 : 645 : BlockNumber allvisible;
504 : 645 : BlockNumber allfrozen;
505 : 645 : float first_region_ratio;
506 : 645 : bool oldest_unfrozen_before_cutoff = false;
507 : :
508 : : /*
509 : : * Initialize eager scan management fields to their disabled values.
510 : : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
511 : : * of tables without sufficiently old tuples disable eager scanning.
512 : : */
513 : 645 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
514 : 645 : vacrel->eager_scan_max_fails_per_region = 0;
515 : 645 : vacrel->eager_scan_remaining_fails = 0;
516 : 645 : vacrel->eager_scan_remaining_successes = 0;
517 : :
518 : : /* If eager scanning is explicitly disabled, just return. */
519 [ - + ]: 645 : if (params.max_eager_freeze_failure_rate == 0)
520 : 0 : return;
521 : :
522 : : /*
523 : : * The caller will have determined whether or not an aggressive vacuum is
524 : : * required by either the vacuum parameters or the relative age of the
525 : : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
526 : : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
527 : : * so scans of all-visible pages are not considered eager.
528 : : */
529 [ + + ]: 645 : if (vacrel->aggressive)
530 : 135 : return;
531 : :
532 : : /*
533 : : * Aggressively vacuuming a small relation shouldn't take long, so it
534 : : * isn't worth amortizing. We use two times the region size as the size
535 : : * cutoff because the eager scan start block is a random spot somewhere in
536 : : * the first region, making the second region the first to be eager
537 : : * scanned normally.
538 : : */
539 [ + - ]: 510 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
540 : 510 : return;
541 : :
542 : : /*
543 : : * We only want to enable eager scanning if we are likely to be able to
544 : : * freeze some of the pages in the relation.
545 : : *
546 : : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
547 : : * are technically freezable, but we won't freeze them unless the criteria
548 : : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
549 : : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
550 : : *
551 : : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
552 : : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
553 : : * enable eager scanning.
554 : : */
555 [ # # # # ]: 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
556 : 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
557 : 0 : vacrel->cutoffs.FreezeLimit))
558 : 0 : oldest_unfrozen_before_cutoff = true;
559 : :
560 [ # # ]: 0 : if (!oldest_unfrozen_before_cutoff &&
561 [ # # # # ]: 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
562 : 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
563 : 0 : vacrel->cutoffs.MultiXactCutoff))
564 : 0 : oldest_unfrozen_before_cutoff = true;
565 : :
566 [ # # ]: 0 : if (!oldest_unfrozen_before_cutoff)
567 : 0 : return;
568 : :
569 : : /* We have met the criteria to eagerly scan some pages. */
570 : :
571 : : /*
572 : : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
573 : : * all-visible but not all-frozen blocks in the relation.
574 : : */
575 : 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
576 : :
577 : 0 : vacrel->eager_scan_remaining_successes =
578 : 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
579 : 0 : (allvisible - allfrozen));
580 : :
581 : : /* If every all-visible page is frozen, eager scanning is disabled. */
582 [ # # ]: 0 : if (vacrel->eager_scan_remaining_successes == 0)
583 : 0 : return;
584 : :
585 : : /*
586 : : * Now calculate the bounds of the first eager scan region. Its end block
587 : : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
588 : : * blocks. This affects the bounds of all subsequent regions and avoids
589 : : * eager scanning and failing to freeze the same blocks each vacuum of the
590 : : * relation.
591 : : */
592 : 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
593 : :
594 : 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
595 : :
596 [ # # ]: 0 : Assert(params.max_eager_freeze_failure_rate > 0 &&
597 : : params.max_eager_freeze_failure_rate <= 1);
598 : :
599 : 0 : vacrel->eager_scan_max_fails_per_region =
600 : 0 : params.max_eager_freeze_failure_rate *
601 : : EAGER_SCAN_REGION_SIZE;
602 : :
603 : : /*
604 : : * The first region will be smaller than subsequent regions. As such,
605 : : * adjust the eager freeze failures tolerated for this region.
606 : : */
607 : 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
608 : : EAGER_SCAN_REGION_SIZE;
609 : :
610 : 0 : vacrel->eager_scan_remaining_fails =
611 : 0 : vacrel->eager_scan_max_fails_per_region *
612 : 0 : first_region_ratio;
613 [ - + ]: 645 : }
614 : :
615 : : /*
616 : : * heap_vacuum_rel() -- perform VACUUM for one heap relation
617 : : *
618 : : * This routine sets things up for and then calls lazy_scan_heap, where
619 : : * almost all work actually takes place. Finalizes everything after call
620 : : * returns by managing relation truncation and updating rel's pg_class
621 : : * entry. (Also updates pg_class entries for any indexes that need it.)
622 : : *
623 : : * At entry, we have already established a transaction and opened
624 : : * and locked the relation.
625 : : */
626 : : void
627 : 645 : heap_vacuum_rel(Relation rel, const VacuumParams params,
628 : : BufferAccessStrategy bstrategy)
629 : : {
630 : 645 : LVRelState *vacrel;
631 : 645 : bool verbose,
632 : : instrument,
633 : : skipwithvm,
634 : : frozenxid_updated,
635 : : minmulti_updated;
636 : 645 : BlockNumber orig_rel_pages,
637 : : new_rel_pages,
638 : : new_rel_allvisible,
639 : : new_rel_allfrozen;
640 : 645 : PGRUsage ru0;
641 : 645 : TimestampTz starttime = 0;
642 : 645 : PgStat_Counter startreadtime = 0,
643 : 645 : startwritetime = 0;
644 : 645 : WalUsage startwalusage = pgWalUsage;
645 : 645 : BufferUsage startbufferusage = pgBufferUsage;
646 : 645 : ErrorContextCallback errcallback;
647 : 645 : char **indnames = NULL;
648 : 645 : Size dead_items_max_bytes = 0;
649 : :
650 : 645 : verbose = (params.options & VACOPT_VERBOSE) != 0;
651 [ - + + - ]: 645 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
652 : 0 : params.log_vacuum_min_duration >= 0));
653 [ + - ]: 645 : if (instrument)
654 : : {
655 : 0 : pg_rusage_init(&ru0);
656 [ # # ]: 0 : if (track_io_timing)
657 : : {
658 : 0 : startreadtime = pgStatBlockReadTime;
659 : 0 : startwritetime = pgStatBlockWriteTime;
660 : 0 : }
661 : 0 : }
662 : :
663 : : /* Used for instrumentation and stats report */
664 : 645 : starttime = GetCurrentTimestamp();
665 : :
666 : 645 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
667 : 645 : RelationGetRelid(rel));
668 [ - + ]: 645 : if (AmAutoVacuumWorkerProcess())
669 : 0 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
670 : 0 : params.is_wraparound
671 : : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
672 : : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
673 : : else
674 : 645 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
675 : : PROGRESS_VACUUM_STARTED_BY_MANUAL);
676 : :
677 : : /*
678 : : * Setup error traceback support for ereport() first. The idea is to set
679 : : * up an error context callback to display additional information on any
680 : : * error during a vacuum. During different phases of vacuum, we update
681 : : * the state so that the error context callback always display current
682 : : * information.
683 : : *
684 : : * Copy the names of heap rel into local memory for error reporting
685 : : * purposes, too. It isn't always safe to assume that we can get the name
686 : : * of each rel. It's convenient for code in lazy_scan_heap to always use
687 : : * these temp copies.
688 : : */
689 : 645 : vacrel = palloc0_object(LVRelState);
690 : 645 : vacrel->dbname = get_database_name(MyDatabaseId);
691 : 645 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
692 : 645 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
693 : 645 : vacrel->indname = NULL;
694 : 645 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
695 : 645 : vacrel->verbose = verbose;
696 : 645 : errcallback.callback = vacuum_error_callback;
697 : 645 : errcallback.arg = vacrel;
698 : 645 : errcallback.previous = error_context_stack;
699 : 645 : error_context_stack = &errcallback;
700 : :
701 : : /* Set up high level stuff about rel and its indexes */
702 : 645 : vacrel->rel = rel;
703 : 1290 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
704 : 645 : &vacrel->indrels);
705 : 645 : vacrel->bstrategy = bstrategy;
706 [ - + # # ]: 645 : if (instrument && vacrel->nindexes > 0)
707 : : {
708 : : /* Copy index names used by instrumentation (not error reporting) */
709 : 0 : indnames = palloc_array(char *, vacrel->nindexes);
710 [ # # ]: 0 : for (int i = 0; i < vacrel->nindexes; i++)
711 : 0 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
712 : 0 : }
713 : :
714 : : /*
715 : : * The index_cleanup param either disables index vacuuming and cleanup or
716 : : * forces it to go ahead when we would otherwise apply the index bypass
717 : : * optimization. The default is 'auto', which leaves the final decision
718 : : * up to lazy_vacuum().
719 : : *
720 : : * The truncate param allows user to avoid attempting relation truncation,
721 : : * though it can't force truncation to happen.
722 : : */
723 [ + - ]: 645 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
724 [ + - ]: 645 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
725 : : params.truncate != VACOPTVALUE_AUTO);
726 : :
727 : : /*
728 : : * While VacuumFailSafeActive is reset to false before calling this, we
729 : : * still need to reset it here due to recursive calls.
730 : : */
731 : 645 : VacuumFailsafeActive = false;
732 : 645 : vacrel->consider_bypass_optimization = true;
733 : 645 : vacrel->do_index_vacuuming = true;
734 : 645 : vacrel->do_index_cleanup = true;
735 : 645 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
736 [ + + ]: 645 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
737 : : {
738 : : /* Force disable index vacuuming up-front */
739 : 7 : vacrel->do_index_vacuuming = false;
740 : 7 : vacrel->do_index_cleanup = false;
741 : 7 : }
742 [ + + ]: 638 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
743 : : {
744 : : /* Force index vacuuming. Note that failsafe can still bypass. */
745 : 4 : vacrel->consider_bypass_optimization = false;
746 : 4 : }
747 : : else
748 : : {
749 : : /* Default/auto, make all decisions dynamically */
750 [ + - ]: 634 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
751 : : }
752 : :
753 : : /* Initialize page counters explicitly (be tidy) */
754 : 645 : vacrel->scanned_pages = 0;
755 : 645 : vacrel->eager_scanned_pages = 0;
756 : 645 : vacrel->removed_pages = 0;
757 : 645 : vacrel->new_frozen_tuple_pages = 0;
758 : 645 : vacrel->lpdead_item_pages = 0;
759 : 645 : vacrel->missed_dead_pages = 0;
760 : 645 : vacrel->nonempty_pages = 0;
761 : : /* dead_items_alloc allocates vacrel->dead_items later on */
762 : :
763 : : /* Allocate/initialize output statistics state */
764 : 645 : vacrel->new_rel_tuples = 0;
765 : 645 : vacrel->new_live_tuples = 0;
766 : 645 : vacrel->indstats = (IndexBulkDeleteResult **)
767 : 645 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
768 : :
769 : : /* Initialize remaining counters (be tidy) */
770 : 645 : vacrel->num_index_scans = 0;
771 : 645 : vacrel->num_dead_items_resets = 0;
772 : 645 : vacrel->total_dead_items_bytes = 0;
773 : 645 : vacrel->tuples_deleted = 0;
774 : 645 : vacrel->tuples_frozen = 0;
775 : 645 : vacrel->lpdead_items = 0;
776 : 645 : vacrel->live_tuples = 0;
777 : 645 : vacrel->recently_dead_tuples = 0;
778 : 645 : vacrel->missed_dead_tuples = 0;
779 : :
780 : 645 : vacrel->vm_new_visible_pages = 0;
781 : 645 : vacrel->vm_new_visible_frozen_pages = 0;
782 : 645 : vacrel->vm_new_frozen_pages = 0;
783 : :
784 : : /*
785 : : * Get cutoffs that determine which deleted tuples are considered DEAD,
786 : : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
787 : : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
788 : : * happen in this order to ensure that the OldestXmin cutoff field works
789 : : * as an upper bound on the XIDs stored in the pages we'll actually scan
790 : : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
791 : : *
792 : : * Next acquire vistest, a related cutoff that's used in pruning. We use
793 : : * vistest in combination with OldestXmin to ensure that
794 : : * heap_page_prune_and_freeze() always removes any deleted tuple whose
795 : : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
796 : : * whether a tuple should be frozen or removed. (In the future we might
797 : : * want to teach lazy_scan_prune to recompute vistest from time to time,
798 : : * to increase the number of dead tuples it can prune away.)
799 : : */
800 : 645 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
801 : 645 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
802 : 645 : vacrel->vistest = GlobalVisTestFor(rel);
803 : :
804 : : /* Initialize state used to track oldest extant XID/MXID */
805 : 645 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
806 : 645 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
807 : :
808 : : /*
809 : : * Initialize state related to tracking all-visible page skipping. This is
810 : : * very important to determine whether or not it is safe to advance the
811 : : * relfrozenxid/relminmxid.
812 : : */
813 : 645 : vacrel->skippedallvis = false;
814 : 645 : skipwithvm = true;
815 [ + + ]: 645 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
816 : : {
817 : : /*
818 : : * Force aggressive mode, and disable skipping blocks using the
819 : : * visibility map (even those set all-frozen)
820 : : */
821 : 15 : vacrel->aggressive = true;
822 : 15 : skipwithvm = false;
823 : 15 : }
824 : :
825 : 645 : vacrel->skipwithvm = skipwithvm;
826 : :
827 : : /*
828 : : * Set up eager scan tracking state. This must happen after determining
829 : : * whether or not the vacuum must be aggressive, because only normal
830 : : * vacuums use the eager scan algorithm.
831 : : */
832 : 645 : heap_vacuum_eager_scan_setup(vacrel, params);
833 : :
834 : : /* Report the vacuum mode: 'normal' or 'aggressive' */
835 : 645 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
836 : 645 : vacrel->aggressive
837 : : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
838 : : : PROGRESS_VACUUM_MODE_NORMAL);
839 : :
840 [ + - ]: 645 : if (verbose)
841 : : {
842 [ # # ]: 0 : if (vacrel->aggressive)
843 [ # # # # ]: 0 : ereport(INFO,
844 : : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
845 : : vacrel->dbname, vacrel->relnamespace,
846 : : vacrel->relname)));
847 : : else
848 [ # # # # ]: 0 : ereport(INFO,
849 : : (errmsg("vacuuming \"%s.%s.%s\"",
850 : : vacrel->dbname, vacrel->relnamespace,
851 : : vacrel->relname)));
852 : 0 : }
853 : :
854 : : /*
855 : : * Allocate dead_items memory using dead_items_alloc. This handles
856 : : * parallel VACUUM initialization as part of allocating shared memory
857 : : * space used for dead_items. (But do a failsafe precheck first, to
858 : : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
859 : : * is already dangerously old.)
860 : : */
861 : 645 : lazy_check_wraparound_failsafe(vacrel);
862 : 645 : dead_items_alloc(vacrel, params.nworkers);
863 : :
864 : : /*
865 : : * Call lazy_scan_heap to perform all required heap pruning, index
866 : : * vacuuming, and heap vacuuming (plus related processing)
867 : : */
868 : 645 : lazy_scan_heap(vacrel);
869 : :
870 : : /*
871 : : * Save dead items max_bytes and update the memory usage statistics before
872 : : * cleanup, they are freed in parallel vacuum cases during
873 : : * dead_items_cleanup().
874 : : */
875 : 645 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
876 : 645 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
877 : :
878 : : /*
879 : : * Free resources managed by dead_items_alloc. This ends parallel mode in
880 : : * passing when necessary.
881 : : */
882 : 645 : dead_items_cleanup(vacrel);
883 [ + - ]: 645 : Assert(!IsInParallelMode());
884 : :
885 : : /*
886 : : * Update pg_class entries for each of rel's indexes where appropriate.
887 : : *
888 : : * Unlike the later update to rel's pg_class entry, this is not critical.
889 : : * Maintains relpages/reltuples statistics used by the planner only.
890 : : */
891 [ + + ]: 645 : if (vacrel->do_index_cleanup)
892 : 638 : update_relstats_all_indexes(vacrel);
893 : :
894 : : /* Done with rel's indexes */
895 : 645 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
896 : :
897 : : /* Optionally truncate rel */
898 [ + + ]: 645 : if (should_attempt_truncation(vacrel))
899 : 33 : lazy_truncate_heap(vacrel);
900 : :
901 : : /* Pop the error context stack */
902 : 645 : error_context_stack = errcallback.previous;
903 : :
904 : : /* Report that we are now doing final cleanup */
905 : 645 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
906 : : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
907 : :
908 : : /*
909 : : * Prepare to update rel's pg_class entry.
910 : : *
911 : : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
912 : : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
913 : : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
914 : : */
915 [ + + - + : 645 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
+ - ]
916 : : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
917 : : vacrel->cutoffs.relfrozenxid,
918 : : vacrel->NewRelfrozenXid));
919 [ + - # # : 645 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
# # ]
920 : : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
921 : : vacrel->cutoffs.relminmxid,
922 : : vacrel->NewRelminMxid));
923 [ + + ]: 645 : if (vacrel->skippedallvis)
924 : : {
925 : : /*
926 : : * Must keep original relfrozenxid in a non-aggressive VACUUM that
927 : : * chose to skip an all-visible page range. The state that tracks new
928 : : * values will have missed unfrozen XIDs from the pages we skipped.
929 : : */
930 [ + - ]: 4 : Assert(!vacrel->aggressive);
931 : 4 : vacrel->NewRelfrozenXid = InvalidTransactionId;
932 : 4 : vacrel->NewRelminMxid = InvalidMultiXactId;
933 : 4 : }
934 : :
935 : : /*
936 : : * For safety, clamp relallvisible to be not more than what we're setting
937 : : * pg_class.relpages to
938 : : */
939 : 645 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
940 : 645 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
941 [ + - ]: 645 : if (new_rel_allvisible > new_rel_pages)
942 : 0 : new_rel_allvisible = new_rel_pages;
943 : :
944 : : /*
945 : : * An all-frozen block _must_ be all-visible. As such, clamp the count of
946 : : * all-frozen blocks to the count of all-visible blocks. This matches the
947 : : * clamping of relallvisible above.
948 : : */
949 [ + - ]: 645 : if (new_rel_allfrozen > new_rel_allvisible)
950 : 0 : new_rel_allfrozen = new_rel_allvisible;
951 : :
952 : : /*
953 : : * Now actually update rel's pg_class entry.
954 : : *
955 : : * In principle new_live_tuples could be -1 indicating that we (still)
956 : : * don't know the tuple count. In practice that can't happen, since we
957 : : * scan every page that isn't skipped using the visibility map.
958 : : */
959 : 1290 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
960 : 645 : new_rel_allvisible, new_rel_allfrozen,
961 : 645 : vacrel->nindexes > 0,
962 : 645 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
963 : : &frozenxid_updated, &minmulti_updated, false);
964 : :
965 : : /*
966 : : * Report results to the cumulative stats system, too.
967 : : *
968 : : * Deliberately avoid telling the stats system about LP_DEAD items that
969 : : * remain in the table due to VACUUM bypassing index and heap vacuuming.
970 : : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
971 : : * It seems like a good idea to err on the side of not vacuuming again too
972 : : * soon in cases where the failsafe prevented significant amounts of heap
973 : : * vacuuming.
974 : : */
975 : 1290 : pgstat_report_vacuum(rel,
976 [ + + ]: 645 : Max(vacrel->new_live_tuples, 0),
977 : 1290 : vacrel->recently_dead_tuples +
978 : 645 : vacrel->missed_dead_tuples,
979 : 645 : starttime);
980 : 645 : pgstat_progress_end_command();
981 : :
982 [ + - ]: 645 : if (instrument)
983 : : {
984 : 0 : TimestampTz endtime = GetCurrentTimestamp();
985 : :
986 [ # # # # : 0 : if (verbose || params.log_vacuum_min_duration == 0 ||
# # ]
987 : 0 : TimestampDifferenceExceeds(starttime, endtime,
988 : 0 : params.log_vacuum_min_duration))
989 : : {
990 : 0 : long secs_dur;
991 : 0 : int usecs_dur;
992 : 0 : WalUsage walusage;
993 : 0 : BufferUsage bufferusage;
994 : 0 : StringInfoData buf;
995 : 0 : char *msgfmt;
996 : 0 : int32 diff;
997 : 0 : double read_rate = 0,
998 : 0 : write_rate = 0;
999 : 0 : int64 total_blks_hit;
1000 : 0 : int64 total_blks_read;
1001 : 0 : int64 total_blks_dirtied;
1002 : :
1003 : 0 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1004 : 0 : memset(&walusage, 0, sizeof(WalUsage));
1005 : 0 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1006 : 0 : memset(&bufferusage, 0, sizeof(BufferUsage));
1007 : 0 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1008 : :
1009 : 0 : total_blks_hit = bufferusage.shared_blks_hit +
1010 : 0 : bufferusage.local_blks_hit;
1011 : 0 : total_blks_read = bufferusage.shared_blks_read +
1012 : 0 : bufferusage.local_blks_read;
1013 : 0 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1014 : 0 : bufferusage.local_blks_dirtied;
1015 : :
1016 : 0 : initStringInfo(&buf);
1017 [ # # ]: 0 : if (verbose)
1018 : : {
1019 : : /*
1020 : : * Aggressiveness already reported earlier, in dedicated
1021 : : * VACUUM VERBOSE ereport
1022 : : */
1023 [ # # ]: 0 : Assert(!params.is_wraparound);
1024 : 0 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1025 : 0 : }
1026 [ # # ]: 0 : else if (params.is_wraparound)
1027 : : {
1028 : : /*
1029 : : * While it's possible for a VACUUM to be both is_wraparound
1030 : : * and !aggressive, that's just a corner-case -- is_wraparound
1031 : : * implies aggressive. Produce distinct output for the corner
1032 : : * case all the same, just in case.
1033 : : */
1034 [ # # ]: 0 : if (vacrel->aggressive)
1035 : 0 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1036 : : else
1037 : 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1038 : 0 : }
1039 : : else
1040 : : {
1041 [ # # ]: 0 : if (vacrel->aggressive)
1042 : 0 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1043 : : else
1044 : 0 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1045 : : }
1046 : 0 : appendStringInfo(&buf, msgfmt,
1047 : 0 : vacrel->dbname,
1048 : 0 : vacrel->relnamespace,
1049 : 0 : vacrel->relname,
1050 : 0 : vacrel->num_index_scans);
1051 : 0 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1052 : 0 : vacrel->removed_pages,
1053 : 0 : new_rel_pages,
1054 : 0 : vacrel->scanned_pages,
1055 [ # # ]: 0 : orig_rel_pages == 0 ? 100.0 :
1056 : 0 : 100.0 * vacrel->scanned_pages /
1057 : 0 : orig_rel_pages,
1058 : 0 : vacrel->eager_scanned_pages);
1059 : 0 : appendStringInfo(&buf,
1060 : 0 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1061 : 0 : vacrel->tuples_deleted,
1062 : 0 : (int64) vacrel->new_rel_tuples,
1063 : 0 : vacrel->recently_dead_tuples);
1064 [ # # ]: 0 : if (vacrel->missed_dead_tuples > 0)
1065 : 0 : appendStringInfo(&buf,
1066 : 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1067 : 0 : vacrel->missed_dead_tuples,
1068 : 0 : vacrel->missed_dead_pages);
1069 : 0 : diff = (int32) (ReadNextTransactionId() -
1070 : 0 : vacrel->cutoffs.OldestXmin);
1071 : 0 : appendStringInfo(&buf,
1072 : 0 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1073 : 0 : vacrel->cutoffs.OldestXmin, diff);
1074 [ # # ]: 0 : if (frozenxid_updated)
1075 : : {
1076 : 0 : diff = (int32) (vacrel->NewRelfrozenXid -
1077 : 0 : vacrel->cutoffs.relfrozenxid);
1078 : 0 : appendStringInfo(&buf,
1079 : 0 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1080 : 0 : vacrel->NewRelfrozenXid, diff);
1081 : 0 : }
1082 [ # # ]: 0 : if (minmulti_updated)
1083 : : {
1084 : 0 : diff = (int32) (vacrel->NewRelminMxid -
1085 : 0 : vacrel->cutoffs.relminmxid);
1086 : 0 : appendStringInfo(&buf,
1087 : 0 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1088 : 0 : vacrel->NewRelminMxid, diff);
1089 : 0 : }
1090 : 0 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1091 : 0 : vacrel->new_frozen_tuple_pages,
1092 [ # # ]: 0 : orig_rel_pages == 0 ? 100.0 :
1093 : 0 : 100.0 * vacrel->new_frozen_tuple_pages /
1094 : 0 : orig_rel_pages,
1095 : 0 : vacrel->tuples_frozen);
1096 : :
1097 : 0 : appendStringInfo(&buf,
1098 : 0 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1099 : 0 : vacrel->vm_new_visible_pages,
1100 : 0 : vacrel->vm_new_visible_frozen_pages +
1101 : 0 : vacrel->vm_new_frozen_pages,
1102 : 0 : vacrel->vm_new_frozen_pages);
1103 [ # # ]: 0 : if (vacrel->do_index_vacuuming)
1104 : : {
1105 [ # # # # ]: 0 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1106 : 0 : appendStringInfoString(&buf, _("index scan not needed: "));
1107 : : else
1108 : 0 : appendStringInfoString(&buf, _("index scan needed: "));
1109 : :
1110 : 0 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1111 : 0 : }
1112 : : else
1113 : : {
1114 [ # # ]: 0 : if (!VacuumFailsafeActive)
1115 : 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1116 : : else
1117 : 0 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1118 : :
1119 : 0 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1120 : : }
1121 : 0 : appendStringInfo(&buf, msgfmt,
1122 : 0 : vacrel->lpdead_item_pages,
1123 [ # # ]: 0 : orig_rel_pages == 0 ? 100.0 :
1124 : 0 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1125 : 0 : vacrel->lpdead_items);
1126 [ # # ]: 0 : for (int i = 0; i < vacrel->nindexes; i++)
1127 : : {
1128 : 0 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1129 : :
1130 [ # # ]: 0 : if (!istat)
1131 : 0 : continue;
1132 : :
1133 : 0 : appendStringInfo(&buf,
1134 : 0 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1135 : 0 : indnames[i],
1136 : 0 : istat->num_pages,
1137 : 0 : istat->pages_newly_deleted,
1138 : 0 : istat->pages_deleted,
1139 : 0 : istat->pages_free);
1140 [ # # # ]: 0 : }
1141 [ # # ]: 0 : if (track_cost_delay_timing)
1142 : : {
1143 : : /*
1144 : : * We bypass the changecount mechanism because this value is
1145 : : * only updated by the calling process. We also rely on the
1146 : : * above call to pgstat_progress_end_command() to not clear
1147 : : * the st_progress_param array.
1148 : : */
1149 : 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1150 : 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1151 : 0 : }
1152 [ # # ]: 0 : if (track_io_timing)
1153 : : {
1154 : 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1155 : 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1156 : :
1157 : 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1158 : 0 : read_ms, write_ms);
1159 : 0 : }
1160 [ # # # # ]: 0 : if (secs_dur > 0 || usecs_dur > 0)
1161 : : {
1162 : 0 : read_rate = (double) BLCKSZ * total_blks_read /
1163 : 0 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 : 0 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1165 : 0 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1166 : 0 : }
1167 : 0 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1168 : 0 : read_rate, write_rate);
1169 : 0 : appendStringInfo(&buf,
1170 : 0 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1171 : 0 : total_blks_hit,
1172 : 0 : total_blks_read,
1173 : 0 : total_blks_dirtied);
1174 : 0 : appendStringInfo(&buf,
1175 : 0 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1176 : 0 : walusage.wal_records,
1177 : 0 : walusage.wal_fpi,
1178 : 0 : walusage.wal_bytes,
1179 : 0 : walusage.wal_fpi_bytes,
1180 : 0 : walusage.wal_buffers_full);
1181 : :
1182 : : /*
1183 : : * Report the dead items memory usage.
1184 : : *
1185 : : * The num_dead_items_resets counter increases when we reset the
1186 : : * collected dead items, so the counter is non-zero if at least
1187 : : * one dead items are collected, even if index vacuuming is
1188 : : * disabled.
1189 : : */
1190 : 0 : appendStringInfo(&buf,
1191 : 0 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1192 : : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1193 : 0 : vacrel->num_dead_items_resets),
1194 : 0 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1195 : 0 : vacrel->num_dead_items_resets,
1196 : 0 : (double) dead_items_max_bytes / (1024 * 1024));
1197 : 0 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1198 : :
1199 [ # # # # : 0 : ereport(verbose ? INFO : LOG,
# # # # #
# ]
1200 : : (errmsg_internal("%s", buf.data)));
1201 : 0 : pfree(buf.data);
1202 : 0 : }
1203 : 0 : }
1204 : :
1205 : : /* Cleanup index statistics and index names */
1206 [ + + ]: 1287 : for (int i = 0; i < vacrel->nindexes; i++)
1207 : : {
1208 [ + + ]: 642 : if (vacrel->indstats[i])
1209 : 155 : pfree(vacrel->indstats[i]);
1210 : :
1211 [ + - ]: 642 : if (instrument)
1212 : 0 : pfree(indnames[i]);
1213 : 642 : }
1214 : 645 : }
1215 : :
1216 : : /*
1217 : : * lazy_scan_heap() -- workhorse function for VACUUM
1218 : : *
1219 : : * This routine prunes each page in the heap, and considers the need to
1220 : : * freeze remaining tuples with storage (not including pages that can be
1221 : : * skipped using the visibility map). Also performs related maintenance
1222 : : * of the FSM and visibility map. These steps all take place during an
1223 : : * initial pass over the target heap relation.
1224 : : *
1225 : : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1226 : : * consists of deleting index tuples that point to LP_DEAD items left in
1227 : : * heap pages following pruning. Earlier initial pass over the heap will
1228 : : * have collected the TIDs whose index tuples need to be removed.
1229 : : *
1230 : : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1231 : : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1232 : : * as LP_UNUSED. This has to happen in a second, final pass over the
1233 : : * heap, to preserve a basic invariant that all index AMs rely on: no
1234 : : * extant index tuple can ever be allowed to contain a TID that points to
1235 : : * an LP_UNUSED line pointer in the heap. We must disallow premature
1236 : : * recycling of line pointers to avoid index scans that get confused
1237 : : * about which TID points to which tuple immediately after recycling.
1238 : : * (Actually, this isn't a concern when target heap relation happens to
1239 : : * have no indexes, which allows us to safely apply the one-pass strategy
1240 : : * as an optimization).
1241 : : *
1242 : : * In practice we often have enough space to fit all TIDs, and so won't
1243 : : * need to call lazy_vacuum more than once, after our initial pass over
1244 : : * the heap has totally finished. Otherwise things are slightly more
1245 : : * complicated: our "initial pass" over the heap applies only to those
1246 : : * pages that were pruned before we needed to call lazy_vacuum, and our
1247 : : * "final pass" over the heap only vacuums these same heap pages.
1248 : : * However, we process indexes in full every time lazy_vacuum is called,
1249 : : * which makes index processing very inefficient when memory is in short
1250 : : * supply.
1251 : : */
1252 : : static void
1253 : 645 : lazy_scan_heap(LVRelState *vacrel)
1254 : : {
1255 : 645 : ReadStream *stream;
1256 : 1290 : BlockNumber rel_pages = vacrel->rel_pages,
1257 : 645 : blkno = 0,
1258 : 645 : next_fsm_block_to_vacuum = 0;
1259 : 1290 : BlockNumber orig_eager_scan_success_limit =
1260 : 645 : vacrel->eager_scan_remaining_successes; /* for logging */
1261 : 645 : Buffer vmbuffer = InvalidBuffer;
1262 : 645 : const int initprog_index[] = {
1263 : : PROGRESS_VACUUM_PHASE,
1264 : : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1265 : : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1266 : : };
1267 : 645 : int64 initprog_val[3];
1268 : :
1269 : : /* Report that we're scanning the heap, advertising total # of blocks */
1270 : 645 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1271 : 645 : initprog_val[1] = rel_pages;
1272 : 645 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1273 : 645 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1274 : :
1275 : : /* Initialize for the first heap_vac_scan_next_block() call */
1276 : 645 : vacrel->current_block = InvalidBlockNumber;
1277 : 645 : vacrel->next_unskippable_block = InvalidBlockNumber;
1278 : 645 : vacrel->next_unskippable_allvis = false;
1279 : 645 : vacrel->next_unskippable_eager_scanned = false;
1280 : 645 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1281 : :
1282 : : /*
1283 : : * Set up the read stream for vacuum's first pass through the heap.
1284 : : *
1285 : : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1286 : : * explicit work in heap_vac_scan_next_block.
1287 : : */
1288 : 645 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1289 : 645 : vacrel->bstrategy,
1290 : 645 : vacrel->rel,
1291 : : MAIN_FORKNUM,
1292 : : heap_vac_scan_next_block,
1293 : 645 : vacrel,
1294 : : sizeof(uint8));
1295 : :
1296 : 10332 : while (true)
1297 : : {
1298 : 10332 : Buffer buf;
1299 : 10332 : Page page;
1300 : 10332 : uint8 blk_info = 0;
1301 : 10332 : int ndeleted = 0;
1302 : 10332 : bool has_lpdead_items;
1303 : 10332 : void *per_buffer_data = NULL;
1304 : 10332 : bool vm_page_frozen = false;
1305 : 10332 : bool got_cleanup_lock = false;
1306 : :
1307 : 10332 : vacuum_delay_point(false);
1308 : :
1309 : : /*
1310 : : * Regularly check if wraparound failsafe should trigger.
1311 : : *
1312 : : * There is a similar check inside lazy_vacuum_all_indexes(), but
1313 : : * relfrozenxid might start to look dangerously old before we reach
1314 : : * that point. This check also provides failsafe coverage for the
1315 : : * one-pass strategy, and the two-pass strategy with the index_cleanup
1316 : : * param set to 'off'.
1317 : : */
1318 [ + + + - ]: 10332 : if (vacrel->scanned_pages > 0 &&
1319 : 9687 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1320 : 0 : lazy_check_wraparound_failsafe(vacrel);
1321 : :
1322 : : /*
1323 : : * Consider if we definitely have enough space to process TIDs on page
1324 : : * already. If we are close to overrunning the available space for
1325 : : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1326 : : * this page. However, let's force at least one page-worth of tuples
1327 : : * to be stored as to ensure we do at least some work when the memory
1328 : : * configured is so low that we run out before storing anything.
1329 : : */
1330 [ + + + + ]: 10332 : if (vacrel->dead_items_info->num_items > 0 &&
1331 : 1246 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1332 : : {
1333 : : /*
1334 : : * Before beginning index vacuuming, we release any pin we may
1335 : : * hold on the visibility map page. This isn't necessary for
1336 : : * correctness, but we do it anyway to avoid holding the pin
1337 : : * across a lengthy, unrelated operation.
1338 : : */
1339 [ - + ]: 5 : if (BufferIsValid(vmbuffer))
1340 : : {
1341 : 5 : ReleaseBuffer(vmbuffer);
1342 : 5 : vmbuffer = InvalidBuffer;
1343 : 5 : }
1344 : :
1345 : : /* Perform a round of index and heap vacuuming */
1346 : 5 : vacrel->consider_bypass_optimization = false;
1347 : 5 : lazy_vacuum(vacrel);
1348 : :
1349 : : /*
1350 : : * Vacuum the Free Space Map to make newly-freed space visible on
1351 : : * upper-level FSM pages. Note that blkno is the previously
1352 : : * processed block.
1353 : : */
1354 : 10 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1355 : 5 : blkno + 1);
1356 : 5 : next_fsm_block_to_vacuum = blkno;
1357 : :
1358 : : /* Report that we are once again scanning the heap */
1359 : 5 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1360 : : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1361 : 5 : }
1362 : :
1363 : 10332 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1364 : :
1365 : : /* The relation is exhausted. */
1366 [ + + ]: 10332 : if (!BufferIsValid(buf))
1367 : 645 : break;
1368 : :
1369 : 9687 : blk_info = *((uint8 *) per_buffer_data);
1370 : 9687 : CheckBufferIsPinnedOnce(buf);
1371 : 9687 : page = BufferGetPage(buf);
1372 : 9687 : blkno = BufferGetBlockNumber(buf);
1373 : :
1374 : 9687 : vacrel->scanned_pages++;
1375 [ + - ]: 9687 : if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1376 : 0 : vacrel->eager_scanned_pages++;
1377 : :
1378 : : /* Report as block scanned, update error traceback information */
1379 : 9687 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1380 : 19374 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1381 : 9687 : blkno, InvalidOffsetNumber);
1382 : :
1383 : : /*
1384 : : * Pin the visibility map page in case we need to mark the page
1385 : : * all-visible. In most cases this will be very cheap, because we'll
1386 : : * already have the correct page pinned anyway.
1387 : : */
1388 : 9687 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1389 : :
1390 : : /*
1391 : : * We need a buffer cleanup lock to prune HOT chains and defragment
1392 : : * the page in lazy_scan_prune. But when it's not possible to acquire
1393 : : * a cleanup lock right away, we may be able to settle for reduced
1394 : : * processing using lazy_scan_noprune.
1395 : : */
1396 : 9687 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1397 : :
1398 [ + - ]: 9687 : if (!got_cleanup_lock)
1399 : 0 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1400 : :
1401 : : /* Check for new or empty pages before lazy_scan_[no]prune call */
1402 [ + + + + ]: 19374 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1403 : 9687 : vmbuffer))
1404 : : {
1405 : : /* Processed as new/empty page (lock and pin released) */
1406 : 344 : continue;
1407 : : }
1408 : :
1409 : : /*
1410 : : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1411 : : * items in the dead_items area for later vacuuming, count live and
1412 : : * recently dead tuples for vacuum logging, and determine if this
1413 : : * block could later be truncated. If we encounter any xid/mxids that
1414 : : * require advancing the relfrozenxid/relminxid, we'll have to wait
1415 : : * for a cleanup lock and call lazy_scan_prune().
1416 : : */
1417 [ - + # # ]: 9343 : if (!got_cleanup_lock &&
1418 : 0 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1419 : : {
1420 : : /*
1421 : : * lazy_scan_noprune could not do all required processing. Wait
1422 : : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1423 : : */
1424 [ # # ]: 0 : Assert(vacrel->aggressive);
1425 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1426 : 0 : LockBufferForCleanup(buf);
1427 : 0 : got_cleanup_lock = true;
1428 : 0 : }
1429 : :
1430 : : /*
1431 : : * If we have a cleanup lock, we must now prune, freeze, and count
1432 : : * tuples. We may have acquired the cleanup lock originally, or we may
1433 : : * have gone back and acquired it after lazy_scan_noprune() returned
1434 : : * false. Either way, the page hasn't been processed yet.
1435 : : *
1436 : : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1437 : : * recently_dead_tuples and live tuples for vacuum logging, determine
1438 : : * if the block can later be truncated, and accumulate the details of
1439 : : * remaining LP_DEAD line pointers on the page into dead_items. These
1440 : : * dead items include those pruned by lazy_scan_prune() as well as
1441 : : * line pointers previously marked LP_DEAD.
1442 : : */
1443 [ - + ]: 9343 : if (got_cleanup_lock)
1444 : 18686 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1445 : 9343 : vmbuffer,
1446 : 9343 : blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
1447 : : &has_lpdead_items, &vm_page_frozen);
1448 : :
1449 : : /*
1450 : : * Count an eagerly scanned page as a failure or a success.
1451 : : *
1452 : : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1453 : : * cleanup lock, we won't have frozen the page. However, we only count
1454 : : * pages that were too new to require freezing as eager freeze
1455 : : * failures.
1456 : : *
1457 : : * We could gather more information from lazy_scan_noprune() about
1458 : : * whether or not there were tuples with XIDs or MXIDs older than the
1459 : : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1460 : : * exclude pages skipped due to cleanup lock contention from eager
1461 : : * freeze algorithm caps.
1462 : : */
1463 [ + - + - ]: 9343 : if (got_cleanup_lock &&
1464 : 9343 : (blk_info & VAC_BLK_WAS_EAGER_SCANNED))
1465 : : {
1466 : : /* Aggressive vacuums do not eager scan. */
1467 [ # # ]: 0 : Assert(!vacrel->aggressive);
1468 : :
1469 [ # # ]: 0 : if (vm_page_frozen)
1470 : : {
1471 [ # # ]: 0 : if (vacrel->eager_scan_remaining_successes > 0)
1472 : 0 : vacrel->eager_scan_remaining_successes--;
1473 : :
1474 [ # # ]: 0 : if (vacrel->eager_scan_remaining_successes == 0)
1475 : : {
1476 : : /*
1477 : : * Report only once that we disabled eager scanning. We
1478 : : * may eagerly read ahead blocks in excess of the success
1479 : : * or failure caps before attempting to freeze them, so we
1480 : : * could reach here even after disabling additional eager
1481 : : * scanning.
1482 : : */
1483 [ # # ]: 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1484 [ # # # # : 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
# # # # #
# ]
1485 : : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1486 : : orig_eager_scan_success_limit,
1487 : : vacrel->dbname, vacrel->relnamespace,
1488 : : vacrel->relname)));
1489 : :
1490 : : /*
1491 : : * If we hit our success cap, permanently disable eager
1492 : : * scanning by setting the other eager scan management
1493 : : * fields to their disabled values.
1494 : : */
1495 : 0 : vacrel->eager_scan_remaining_fails = 0;
1496 : 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1497 : 0 : vacrel->eager_scan_max_fails_per_region = 0;
1498 : 0 : }
1499 : 0 : }
1500 [ # # ]: 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1501 : 0 : vacrel->eager_scan_remaining_fails--;
1502 : 0 : }
1503 : :
1504 : : /*
1505 : : * Now drop the buffer lock and, potentially, update the FSM.
1506 : : *
1507 : : * Our goal is to update the freespace map the last time we touch the
1508 : : * page. If we'll process a block in the second pass, we may free up
1509 : : * additional space on the page, so it is better to update the FSM
1510 : : * after the second pass. If the relation has no indexes, or if index
1511 : : * vacuuming is disabled, there will be no second heap pass; if this
1512 : : * particular page has no dead items, the second heap pass will not
1513 : : * touch this page. So, in those cases, update the FSM now.
1514 : : *
1515 : : * Note: In corner cases, it's possible to miss updating the FSM
1516 : : * entirely. If index vacuuming is currently enabled, we'll skip the
1517 : : * FSM update now. But if failsafe mode is later activated, or there
1518 : : * are so few dead tuples that index vacuuming is bypassed, there will
1519 : : * also be no opportunity to update the FSM later, because we'll never
1520 : : * revisit this page. Since updating the FSM is desirable but not
1521 : : * absolutely required, that's OK.
1522 : : */
1523 : 9343 : if (vacrel->nindexes == 0
1524 [ + + ]: 9343 : || !vacrel->do_index_vacuuming
1525 [ + + + + ]: 7569 : || !has_lpdead_items)
1526 : : {
1527 : 8382 : Size freespace = PageGetHeapFreeSpace(page);
1528 : :
1529 : 8382 : UnlockReleaseBuffer(buf);
1530 : 8382 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1531 : :
1532 : : /*
1533 : : * Periodically perform FSM vacuuming to make newly-freed space
1534 : : * visible on upper FSM pages. This is done after vacuuming if the
1535 : : * table has indexes. There will only be newly-freed space if we
1536 : : * held the cleanup lock and lazy_scan_prune() was called.
1537 : : */
1538 [ + - + + : 8382 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
+ + + - ]
1539 : 39 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1540 : : {
1541 : 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1542 : 0 : blkno);
1543 : 0 : next_fsm_block_to_vacuum = blkno;
1544 : 0 : }
1545 : 8382 : }
1546 : : else
1547 : 961 : UnlockReleaseBuffer(buf);
1548 [ - + + + ]: 10332 : }
1549 : :
1550 : 645 : vacrel->blkno = InvalidBlockNumber;
1551 [ + + ]: 645 : if (BufferIsValid(vmbuffer))
1552 : 345 : ReleaseBuffer(vmbuffer);
1553 : :
1554 : : /*
1555 : : * Report that everything is now scanned. We never skip scanning the last
1556 : : * block in the relation, so we can pass rel_pages here.
1557 : : */
1558 : 645 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1559 : 645 : rel_pages);
1560 : :
1561 : : /* now we can compute the new value for pg_class.reltuples */
1562 : 1290 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1563 : 645 : vacrel->scanned_pages,
1564 : 645 : vacrel->live_tuples);
1565 : :
1566 : : /*
1567 : : * Also compute the total number of surviving heap entries. In the
1568 : : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1569 : : */
1570 : 645 : vacrel->new_rel_tuples =
1571 [ + + ]: 645 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1572 : 645 : vacrel->missed_dead_tuples;
1573 : :
1574 : 645 : read_stream_end(stream);
1575 : :
1576 : : /*
1577 : : * Do index vacuuming (call each index's ambulkdelete routine), then do
1578 : : * related heap vacuuming
1579 : : */
1580 [ + + ]: 645 : if (vacrel->dead_items_info->num_items > 0)
1581 : 60 : lazy_vacuum(vacrel);
1582 : :
1583 : : /*
1584 : : * Vacuum the remainder of the Free Space Map. We must do this whether or
1585 : : * not there were indexes, and whether or not we bypassed index vacuuming.
1586 : : * We can pass rel_pages here because we never skip scanning the last
1587 : : * block of the relation.
1588 : : */
1589 [ + + ]: 645 : if (rel_pages > next_fsm_block_to_vacuum)
1590 : 346 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1591 : :
1592 : : /* report all blocks vacuumed */
1593 : 645 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1594 : :
1595 : : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1596 [ + + + + ]: 645 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1597 : 454 : lazy_cleanup_all_indexes(vacrel);
1598 : 645 : }
1599 : :
1600 : : /*
1601 : : * heap_vac_scan_next_block() -- read stream callback to get the next block
1602 : : * for vacuum to process
1603 : : *
1604 : : * Every time lazy_scan_heap() needs a new block to process during its first
1605 : : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1606 : : * heap_vac_scan_next_block() to get the next block.
1607 : : *
1608 : : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1609 : : * various thresholds to skip blocks which do not need to be processed and
1610 : : * returns the next block to process or InvalidBlockNumber if there are no
1611 : : * remaining blocks.
1612 : : *
1613 : : * The visibility status of the next block to process and whether or not it
1614 : : * was eager scanned is set in the per_buffer_data.
1615 : : *
1616 : : * callback_private_data contains a reference to the LVRelState, passed to the
1617 : : * read stream API during stream setup. The LVRelState is an in/out parameter
1618 : : * here (locally named `vacrel`). Vacuum options and information about the
1619 : : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1620 : : * that's all-visible but not all-frozen (to ensure that we don't update
1621 : : * relfrozenxid in that case). vacrel also holds information about the next
1622 : : * unskippable block -- as bookkeeping for this function.
1623 : : */
1624 : : static BlockNumber
1625 : 10332 : heap_vac_scan_next_block(ReadStream *stream,
1626 : : void *callback_private_data,
1627 : : void *per_buffer_data)
1628 : : {
1629 : 10332 : BlockNumber next_block;
1630 : 10332 : LVRelState *vacrel = callback_private_data;
1631 : 10332 : uint8 blk_info = 0;
1632 : :
1633 : : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1634 : 10332 : next_block = vacrel->current_block + 1;
1635 : :
1636 : : /* Have we reached the end of the relation? */
1637 [ + + ]: 10332 : if (next_block >= vacrel->rel_pages)
1638 : : {
1639 [ + + ]: 645 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1640 : : {
1641 : 206 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1642 : 206 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1643 : 206 : }
1644 : 645 : return InvalidBlockNumber;
1645 : : }
1646 : :
1647 : : /*
1648 : : * We must be in one of the three following states:
1649 : : */
1650 [ + + + + ]: 9687 : if (next_block > vacrel->next_unskippable_block ||
1651 : 702 : vacrel->next_unskippable_block == InvalidBlockNumber)
1652 : : {
1653 : : /*
1654 : : * 1. We have just processed an unskippable block (or we're at the
1655 : : * beginning of the scan). Find the next unskippable block using the
1656 : : * visibility map.
1657 : : */
1658 : 9331 : bool skipsallvis;
1659 : :
1660 : 9331 : find_next_unskippable_block(vacrel, &skipsallvis);
1661 : :
1662 : : /*
1663 : : * We now know the next block that we must process. It can be the
1664 : : * next block after the one we just processed, or something further
1665 : : * ahead. If it's further ahead, we can jump to it, but we choose to
1666 : : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1667 : : * pages. Since we're reading sequentially, the OS should be doing
1668 : : * readahead for us, so there's no gain in skipping a page now and
1669 : : * then. Skipping such a range might even discourage sequential
1670 : : * detection.
1671 : : *
1672 : : * This test also enables more frequent relfrozenxid advancement
1673 : : * during non-aggressive VACUUMs. If the range has any all-visible
1674 : : * pages then skipping makes updating relfrozenxid unsafe, which is a
1675 : : * real downside.
1676 : : */
1677 [ + + ]: 9331 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1678 : : {
1679 : 8 : next_block = vacrel->next_unskippable_block;
1680 [ + + ]: 8 : if (skipsallvis)
1681 : 4 : vacrel->skippedallvis = true;
1682 : 8 : }
1683 : 9331 : }
1684 : :
1685 : : /* Now we must be in one of the two remaining states: */
1686 [ + + ]: 9687 : if (next_block < vacrel->next_unskippable_block)
1687 : : {
1688 : : /*
1689 : : * 2. We are processing a range of blocks that we could have skipped
1690 : : * but chose not to. We know that they are all-visible in the VM,
1691 : : * otherwise they would've been unskippable.
1692 : : */
1693 : 356 : vacrel->current_block = next_block;
1694 : 356 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1695 : 356 : *((uint8 *) per_buffer_data) = blk_info;
1696 : 356 : return vacrel->current_block;
1697 : : }
1698 : : else
1699 : : {
1700 : : /*
1701 : : * 3. We reached the next unskippable block. Process it. On next
1702 : : * iteration, we will be back in state 1.
1703 : : */
1704 [ + - ]: 9331 : Assert(next_block == vacrel->next_unskippable_block);
1705 : :
1706 : 9331 : vacrel->current_block = next_block;
1707 [ + + ]: 9331 : if (vacrel->next_unskippable_allvis)
1708 : 66 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1709 [ + - ]: 9331 : if (vacrel->next_unskippable_eager_scanned)
1710 : 0 : blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1711 : 9331 : *((uint8 *) per_buffer_data) = blk_info;
1712 : 9331 : return vacrel->current_block;
1713 : : }
1714 : 10332 : }
1715 : :
1716 : : /*
1717 : : * Find the next unskippable block in a vacuum scan using the visibility map.
1718 : : * The next unskippable block and its visibility information is updated in
1719 : : * vacrel.
1720 : : *
1721 : : * Note: our opinion of which blocks can be skipped can go stale immediately.
1722 : : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1723 : : * was concurrently cleared, though. All that matters is that caller scan all
1724 : : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1725 : : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1726 : : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1727 : : * to skip such a range is actually made, making everything safe.)
1728 : : */
1729 : : static void
1730 : 9331 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1731 : : {
1732 : 9331 : BlockNumber rel_pages = vacrel->rel_pages;
1733 : 9331 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1734 : 9331 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1735 : 9331 : bool next_unskippable_eager_scanned = false;
1736 : 9331 : bool next_unskippable_allvis;
1737 : :
1738 : 9331 : *skipsallvis = false;
1739 : :
1740 : 10629 : for (;; next_unskippable_block++)
1741 : : {
1742 : 21258 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1743 : 10629 : next_unskippable_block,
1744 : : &next_unskippable_vmbuffer);
1745 : :
1746 : 10629 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1747 : :
1748 : : /*
1749 : : * At the start of each eager scan region, normal vacuums with eager
1750 : : * scanning enabled reset the failure counter, allowing vacuum to
1751 : : * resume eager scanning if it had been suspended in the previous
1752 : : * region.
1753 : : */
1754 [ + - ]: 10629 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1755 : : {
1756 : 0 : vacrel->eager_scan_remaining_fails =
1757 : 0 : vacrel->eager_scan_max_fails_per_region;
1758 : 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1759 : 0 : }
1760 : :
1761 : : /*
1762 : : * A block is unskippable if it is not all visible according to the
1763 : : * visibility map.
1764 : : */
1765 [ + + ]: 10629 : if (!next_unskippable_allvis)
1766 : : {
1767 [ + - ]: 9265 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1768 : 9265 : break;
1769 : : }
1770 : :
1771 : : /*
1772 : : * Caller must scan the last page to determine whether it has tuples
1773 : : * (caller must have the opportunity to set vacrel->nonempty_pages).
1774 : : * This rule avoids having lazy_truncate_heap() take access-exclusive
1775 : : * lock on rel to attempt a truncation that fails anyway, just because
1776 : : * there are tuples on the last page (it is likely that there will be
1777 : : * tuples on other nearby pages as well, but those can be skipped).
1778 : : *
1779 : : * Implement this by always treating the last block as unsafe to skip.
1780 : : */
1781 [ + + ]: 1364 : if (next_unskippable_block == rel_pages - 1)
1782 : 65 : break;
1783 : :
1784 : : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1785 [ + - ]: 1299 : if (!vacrel->skipwithvm)
1786 : 0 : break;
1787 : :
1788 : : /*
1789 : : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1790 : : * already frozen by now), so this page can be skipped.
1791 : : */
1792 [ + + ]: 1299 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1793 : 758 : continue;
1794 : :
1795 : : /*
1796 : : * Aggressive vacuums cannot skip any all-visible pages that are not
1797 : : * also all-frozen.
1798 : : */
1799 [ + + ]: 541 : if (vacrel->aggressive)
1800 : 1 : break;
1801 : :
1802 : : /*
1803 : : * Normal vacuums with eager scanning enabled only skip all-visible
1804 : : * but not all-frozen pages if they have hit the failure limit for the
1805 : : * current eager scan region.
1806 : : */
1807 [ - + ]: 540 : if (vacrel->eager_scan_remaining_fails > 0)
1808 : : {
1809 : 0 : next_unskippable_eager_scanned = true;
1810 : 0 : break;
1811 : : }
1812 : :
1813 : : /*
1814 : : * All-visible blocks are safe to skip in a normal vacuum. But
1815 : : * remember that the final range contains such a block for later.
1816 : : */
1817 : 540 : *skipsallvis = true;
1818 [ - + + + ]: 10629 : }
1819 : :
1820 : : /* write the local variables back to vacrel */
1821 : 9331 : vacrel->next_unskippable_block = next_unskippable_block;
1822 : 9331 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1823 : 9331 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1824 : 9331 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1825 : 9331 : }
1826 : :
1827 : : /*
1828 : : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1829 : : *
1830 : : * Must call here to handle both new and empty pages before calling
1831 : : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1832 : : * with new or empty pages.
1833 : : *
1834 : : * It's necessary to consider new pages as a special case, since the rules for
1835 : : * maintaining the visibility map and FSM with empty pages are a little
1836 : : * different (though new pages can be truncated away during rel truncation).
1837 : : *
1838 : : * Empty pages are not really a special case -- they're just heap pages that
1839 : : * have no allocated tuples (including even LP_UNUSED items). You might
1840 : : * wonder why we need to handle them here all the same. It's only necessary
1841 : : * because of a corner-case involving a hard crash during heap relation
1842 : : * extension. If we ever make relation-extension crash safe, then it should
1843 : : * no longer be necessary to deal with empty pages here (or new pages, for
1844 : : * that matter).
1845 : : *
1846 : : * Caller must hold at least a shared lock. We might need to escalate the
1847 : : * lock in that case, so the type of lock caller holds needs to be specified
1848 : : * using 'sharelock' argument.
1849 : : *
1850 : : * Returns false in common case where caller should go on to call
1851 : : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1852 : : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1853 : : * behalf.
1854 : : *
1855 : : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1856 : : * is passed here because neither empty nor new pages can be eagerly frozen.
1857 : : * New pages are never frozen. Empty pages are always set frozen in the VM at
1858 : : * the same time that they are set all-visible, and we don't eagerly scan
1859 : : * frozen pages.
1860 : : */
1861 : : static bool
1862 : 9687 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1863 : : Page page, bool sharelock, Buffer vmbuffer)
1864 : : {
1865 : 9687 : Size freespace;
1866 : :
1867 [ + + ]: 9687 : if (PageIsNew(page))
1868 : : {
1869 : : /*
1870 : : * All-zeroes pages can be left over if either a backend extends the
1871 : : * relation by a single page, but crashes before the newly initialized
1872 : : * page has been written out, or when bulk-extending the relation
1873 : : * (which creates a number of empty pages at the tail end of the
1874 : : * relation), and then enters them into the FSM.
1875 : : *
1876 : : * Note we do not enter the page into the visibilitymap. That has the
1877 : : * downside that we repeatedly visit this page in subsequent vacuums,
1878 : : * but otherwise we'll never discover the space on a promoted standby.
1879 : : * The harm of repeated checking ought to normally not be too bad. The
1880 : : * space usually should be used at some point, otherwise there
1881 : : * wouldn't be any regular vacuums.
1882 : : *
1883 : : * Make sure these pages are in the FSM, to ensure they can be reused.
1884 : : * Do that by testing if there's any space recorded for the page. If
1885 : : * not, enter it. We do so after releasing the lock on the heap page,
1886 : : * the FSM is approximate, after all.
1887 : : */
1888 : 269 : UnlockReleaseBuffer(buf);
1889 : :
1890 [ + + ]: 269 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1891 : : {
1892 : 191 : freespace = BLCKSZ - SizeOfPageHeaderData;
1893 : :
1894 : 191 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1895 : 191 : }
1896 : :
1897 : 269 : return true;
1898 : : }
1899 : :
1900 [ + + ]: 9418 : if (PageIsEmpty(page))
1901 : : {
1902 : : /*
1903 : : * It seems likely that caller will always be able to get a cleanup
1904 : : * lock on an empty page. But don't take any chances -- escalate to
1905 : : * an exclusive lock (still don't need a cleanup lock, though).
1906 : : */
1907 [ - + ]: 75 : if (sharelock)
1908 : : {
1909 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1910 : 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1911 : :
1912 [ # # ]: 0 : if (!PageIsEmpty(page))
1913 : : {
1914 : : /* page isn't new or empty -- keep lock and pin for now */
1915 : 0 : return false;
1916 : : }
1917 : 0 : }
1918 : : else
1919 : : {
1920 : : /* Already have a full cleanup lock (which is more than enough) */
1921 : : }
1922 : :
1923 : : /*
1924 : : * Unlike new pages, empty pages are always set all-visible and
1925 : : * all-frozen.
1926 : : */
1927 [ + - ]: 75 : if (!PageIsAllVisible(page))
1928 : : {
1929 : 0 : START_CRIT_SECTION();
1930 : :
1931 : : /* mark buffer dirty before writing a WAL record */
1932 : 0 : MarkBufferDirty(buf);
1933 : :
1934 : : /*
1935 : : * It's possible that another backend has extended the heap,
1936 : : * initialized the page, and then failed to WAL-log the page due
1937 : : * to an ERROR. Since heap extension is not WAL-logged, recovery
1938 : : * might try to replay our record setting the page all-visible and
1939 : : * find that the page isn't initialized, which will cause a PANIC.
1940 : : * To prevent that, check whether the page has been previously
1941 : : * WAL-logged, and if not, do that now.
1942 : : */
1943 [ # # # # : 0 : if (RelationNeedsWAL(vacrel->rel) &&
# # # # ]
1944 : 0 : !XLogRecPtrIsValid(PageGetLSN(page)))
1945 : 0 : log_newpage_buffer(buf, true);
1946 : :
1947 : 0 : PageSetAllVisible(page);
1948 : 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1949 : : InvalidXLogRecPtr,
1950 : 0 : vmbuffer, InvalidTransactionId,
1951 : : VISIBILITYMAP_ALL_VISIBLE |
1952 : : VISIBILITYMAP_ALL_FROZEN);
1953 [ # # ]: 0 : END_CRIT_SECTION();
1954 : :
1955 : : /* Count the newly all-frozen pages for logging */
1956 : 0 : vacrel->vm_new_visible_pages++;
1957 : 0 : vacrel->vm_new_visible_frozen_pages++;
1958 : 0 : }
1959 : :
1960 : 75 : freespace = PageGetHeapFreeSpace(page);
1961 : 75 : UnlockReleaseBuffer(buf);
1962 : 75 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1963 : 75 : return true;
1964 : : }
1965 : :
1966 : : /* page isn't new or empty -- keep lock and pin */
1967 : 9343 : return false;
1968 : 9687 : }
1969 : :
1970 : : /* qsort comparator for sorting OffsetNumbers */
1971 : : static int
1972 : 311843 : cmpOffsetNumbers(const void *a, const void *b)
1973 : : {
1974 : 311843 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1975 : : }
1976 : :
1977 : : /*
1978 : : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1979 : : *
1980 : : * Caller must hold pin and buffer cleanup lock on the buffer.
1981 : : *
1982 : : * vmbuffer is the buffer containing the VM block with visibility information
1983 : : * for the heap block, blkno. all_visible_according_to_vm is the saved
1984 : : * visibility status of the heap block looked up earlier by the caller. We
1985 : : * won't rely entirely on this status, as it may be out of date.
1986 : : *
1987 : : * *has_lpdead_items is set to true or false depending on whether, upon return
1988 : : * from this function, any LP_DEAD items are still present on the page.
1989 : : *
1990 : : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
1991 : : * VM. The caller currently only uses this for determining whether an eagerly
1992 : : * scanned page was successfully set all-frozen.
1993 : : *
1994 : : * Returns the number of tuples deleted from the page during HOT pruning.
1995 : : */
1996 : : static int
1997 : 9343 : lazy_scan_prune(LVRelState *vacrel,
1998 : : Buffer buf,
1999 : : BlockNumber blkno,
2000 : : Page page,
2001 : : Buffer vmbuffer,
2002 : : bool all_visible_according_to_vm,
2003 : : bool *has_lpdead_items,
2004 : : bool *vm_page_frozen)
2005 : : {
2006 : 9343 : Relation rel = vacrel->rel;
2007 : 9343 : PruneFreezeResult presult;
2008 : 46715 : PruneFreezeParams params = {
2009 : 9343 : .relation = rel,
2010 : 9343 : .buffer = buf,
2011 : : .reason = PRUNE_VACUUM_SCAN,
2012 : : .options = HEAP_PAGE_PRUNE_FREEZE,
2013 : 9343 : .vistest = vacrel->vistest,
2014 : 9343 : .cutoffs = &vacrel->cutoffs,
2015 : : };
2016 : :
2017 [ + - ]: 9343 : Assert(BufferGetBlockNumber(buf) == blkno);
2018 : :
2019 : : /*
2020 : : * Prune all HOT-update chains and potentially freeze tuples on this page.
2021 : : *
2022 : : * If the relation has no indexes, we can immediately mark would-be dead
2023 : : * items LP_UNUSED.
2024 : : *
2025 : : * The number of tuples removed from the page is returned in
2026 : : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2027 : : * presult.lpdead_items's final value can be thought of as the number of
2028 : : * tuples that were deleted from indexes.
2029 : : *
2030 : : * We will update the VM after collecting LP_DEAD items and freezing
2031 : : * tuples. Pruning will have determined whether or not the page is
2032 : : * all-visible.
2033 : : */
2034 [ + + ]: 9343 : if (vacrel->nindexes == 0)
2035 : 1774 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2036 : :
2037 : 9343 : heap_page_prune_and_freeze(¶ms,
2038 : : &presult,
2039 : 9343 : &vacrel->offnum,
2040 : 9343 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2041 : :
2042 [ + - ]: 9343 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2043 [ + - ]: 9343 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2044 : :
2045 [ + + ]: 9343 : if (presult.nfrozen > 0)
2046 : : {
2047 : : /*
2048 : : * We don't increment the new_frozen_tuple_pages instrumentation
2049 : : * counter when nfrozen == 0, since it only counts pages with newly
2050 : : * frozen tuples (don't confuse that with pages newly set all-frozen
2051 : : * in VM).
2052 : : */
2053 : 1921 : vacrel->new_frozen_tuple_pages++;
2054 : 1921 : }
2055 : :
2056 : : /*
2057 : : * VACUUM will call heap_page_is_all_visible() during the second pass over
2058 : : * the heap to determine all_visible and all_frozen for the page -- this
2059 : : * is a specialized version of the logic from this function. Now that
2060 : : * we've finished pruning and freezing, make sure that we're in total
2061 : : * agreement with heap_page_is_all_visible() using an assertion.
2062 : : */
2063 : : #ifdef USE_ASSERT_CHECKING
2064 [ + + ]: 9343 : if (presult.all_visible)
2065 : : {
2066 : 6477 : TransactionId debug_cutoff;
2067 : 6477 : bool debug_all_frozen;
2068 : :
2069 [ + - ]: 6477 : Assert(presult.lpdead_items == 0);
2070 : :
2071 [ + - ]: 6477 : Assert(heap_page_is_all_visible(vacrel->rel, buf,
2072 : : vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2073 : : &debug_cutoff, &vacrel->offnum));
2074 : :
2075 [ + - ]: 6477 : Assert(presult.all_frozen == debug_all_frozen);
2076 : :
2077 [ + + + - ]: 6477 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2078 : : debug_cutoff == presult.vm_conflict_horizon);
2079 : 6477 : }
2080 : : #endif
2081 : :
2082 : : /*
2083 : : * Now save details of the LP_DEAD items from the page in vacrel
2084 : : */
2085 [ + + ]: 9343 : if (presult.lpdead_items > 0)
2086 : : {
2087 : 973 : vacrel->lpdead_item_pages++;
2088 : :
2089 : : /*
2090 : : * deadoffsets are collected incrementally in
2091 : : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2092 : : * with an indeterminate order, but dead_items_add requires them to be
2093 : : * sorted.
2094 : : */
2095 : 973 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2096 : : cmpOffsetNumbers);
2097 : :
2098 : 973 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2099 : 973 : }
2100 : :
2101 : : /* Finally, add page-local counts to whole-VACUUM counts */
2102 : 9343 : vacrel->tuples_deleted += presult.ndeleted;
2103 : 9343 : vacrel->tuples_frozen += presult.nfrozen;
2104 : 9343 : vacrel->lpdead_items += presult.lpdead_items;
2105 : 9343 : vacrel->live_tuples += presult.live_tuples;
2106 : 9343 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2107 : :
2108 : : /* Can't truncate this page */
2109 [ + + ]: 9343 : if (presult.hastup)
2110 : 8655 : vacrel->nonempty_pages = blkno + 1;
2111 : :
2112 : : /* Did we find LP_DEAD items? */
2113 : 9343 : *has_lpdead_items = (presult.lpdead_items > 0);
2114 : :
2115 [ + + + - ]: 9343 : Assert(!presult.all_visible || !(*has_lpdead_items));
2116 [ + + + - ]: 9343 : Assert(!presult.all_frozen || presult.all_visible);
2117 : :
2118 : : /*
2119 : : * Handle setting visibility map bit based on information from the VM (as
2120 : : * of last heap_vac_scan_next_block() call), and from all_visible and
2121 : : * all_frozen variables
2122 : : */
2123 [ + + + + ]: 9343 : if (!all_visible_according_to_vm && presult.all_visible)
2124 : : {
2125 : 6130 : uint8 old_vmbits;
2126 : 6130 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2127 : :
2128 [ + + ]: 6130 : if (presult.all_frozen)
2129 : : {
2130 [ + - ]: 2077 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2131 : 2077 : flags |= VISIBILITYMAP_ALL_FROZEN;
2132 : 2077 : }
2133 : :
2134 : : /*
2135 : : * It should never be the case that the visibility map page is set
2136 : : * while the page-level bit is clear, but the reverse is allowed (if
2137 : : * checksums are not enabled). Regardless, set both bits so that we
2138 : : * get back in sync.
2139 : : *
2140 : : * NB: If the heap page is all-visible but the VM bit is not set, we
2141 : : * don't need to dirty the heap page. However, if checksums are
2142 : : * enabled, we do need to make sure that the heap page is dirtied
2143 : : * before passing it to visibilitymap_set(), because it may be logged.
2144 : : * Given that this situation should only happen in rare cases after a
2145 : : * crash, it is not worth optimizing.
2146 : : */
2147 : 6130 : PageSetAllVisible(page);
2148 : 6130 : MarkBufferDirty(buf);
2149 : 12260 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2150 : : InvalidXLogRecPtr,
2151 : 6130 : vmbuffer, presult.vm_conflict_horizon,
2152 : 6130 : flags);
2153 : :
2154 : : /*
2155 : : * If the page wasn't already set all-visible and/or all-frozen in the
2156 : : * VM, count it as newly set for logging.
2157 : : */
2158 [ + - ]: 6130 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2159 : : {
2160 : 6130 : vacrel->vm_new_visible_pages++;
2161 [ + + ]: 6130 : if (presult.all_frozen)
2162 : : {
2163 : 2077 : vacrel->vm_new_visible_frozen_pages++;
2164 : 2077 : *vm_page_frozen = true;
2165 : 2077 : }
2166 : 6130 : }
2167 [ # # # # ]: 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2168 : 0 : presult.all_frozen)
2169 : : {
2170 : 0 : vacrel->vm_new_frozen_pages++;
2171 : 0 : *vm_page_frozen = true;
2172 : 0 : }
2173 : 6130 : }
2174 : :
2175 : : /*
2176 : : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
2177 : : * page-level bit is clear. However, it's possible that the bit got
2178 : : * cleared after heap_vac_scan_next_block() was called, so we must recheck
2179 : : * with buffer lock before concluding that the VM is corrupt.
2180 : : */
2181 [ + + - + : 3213 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
# # ]
2182 : 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
2183 : : {
2184 [ # # # # ]: 0 : ereport(WARNING,
2185 : : (errcode(ERRCODE_DATA_CORRUPTED),
2186 : : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2187 : : vacrel->relname, blkno)));
2188 : :
2189 : 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2190 : : VISIBILITYMAP_VALID_BITS);
2191 : 0 : }
2192 : :
2193 : : /*
2194 : : * It's possible for the value returned by
2195 : : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2196 : : * wrong for us to see tuples that appear to not be visible to everyone
2197 : : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2198 : : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2199 : : * conservative and sometimes returns a value that's unnecessarily small,
2200 : : * so if we see that contradiction it just means that the tuples that we
2201 : : * think are not visible to everyone yet actually are, and the
2202 : : * PD_ALL_VISIBLE flag is correct.
2203 : : *
2204 : : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2205 : : * however.
2206 : : */
2207 [ + + + - ]: 3213 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
2208 : : {
2209 [ # # # # ]: 0 : ereport(WARNING,
2210 : : (errcode(ERRCODE_DATA_CORRUPTED),
2211 : : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2212 : : vacrel->relname, blkno)));
2213 : :
2214 : 0 : PageClearAllVisible(page);
2215 : 0 : MarkBufferDirty(buf);
2216 : 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2217 : : VISIBILITYMAP_VALID_BITS);
2218 : 0 : }
2219 : :
2220 : : /*
2221 : : * If the all-visible page is all-frozen but not marked as such yet, mark
2222 : : * it as all-frozen.
2223 : : */
2224 [ + + + + : 3213 : else if (all_visible_according_to_vm && presult.all_frozen &&
+ + ]
2225 : 249 : !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
2226 : : {
2227 : 3 : uint8 old_vmbits;
2228 : :
2229 : : /*
2230 : : * Avoid relying on all_visible_according_to_vm as a proxy for the
2231 : : * page-level PD_ALL_VISIBLE bit being set, since it might have become
2232 : : * stale -- even when all_visible is set
2233 : : */
2234 [ + - ]: 3 : if (!PageIsAllVisible(page))
2235 : : {
2236 : 0 : PageSetAllVisible(page);
2237 : 0 : MarkBufferDirty(buf);
2238 : 0 : }
2239 : :
2240 : : /*
2241 : : * Set the page all-frozen (and all-visible) in the VM.
2242 : : *
2243 : : * We can pass InvalidTransactionId as our cutoff_xid, since a
2244 : : * snapshotConflictHorizon sufficient to make everything safe for REDO
2245 : : * was logged when the page's tuples were frozen.
2246 : : */
2247 [ + - ]: 3 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2248 : 6 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2249 : : InvalidXLogRecPtr,
2250 : 3 : vmbuffer, InvalidTransactionId,
2251 : : VISIBILITYMAP_ALL_VISIBLE |
2252 : : VISIBILITYMAP_ALL_FROZEN);
2253 : :
2254 : : /*
2255 : : * The page was likely already set all-visible in the VM. However,
2256 : : * there is a small chance that it was modified sometime between
2257 : : * setting all_visible_according_to_vm and checking the visibility
2258 : : * during pruning. Check the return value of old_vmbits anyway to
2259 : : * ensure the visibility map counters used for logging are accurate.
2260 : : */
2261 [ - + ]: 3 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2262 : : {
2263 : 0 : vacrel->vm_new_visible_pages++;
2264 : 0 : vacrel->vm_new_visible_frozen_pages++;
2265 : 0 : *vm_page_frozen = true;
2266 : 0 : }
2267 : :
2268 : : /*
2269 : : * We already checked that the page was not set all-frozen in the VM
2270 : : * above, so we don't need to test the value of old_vmbits.
2271 : : */
2272 : : else
2273 : : {
2274 : 3 : vacrel->vm_new_frozen_pages++;
2275 : 3 : *vm_page_frozen = true;
2276 : : }
2277 : 3 : }
2278 : :
2279 : 18686 : return presult.ndeleted;
2280 : 9343 : }
2281 : :
2282 : : /*
2283 : : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2284 : : *
2285 : : * Caller need only hold a pin and share lock on the buffer, unlike
2286 : : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2287 : : * performed here, it's quite possible that an earlier opportunistic pruning
2288 : : * operation left LP_DEAD items behind. We'll at least collect any such items
2289 : : * in dead_items for removal from indexes.
2290 : : *
2291 : : * For aggressive VACUUM callers, we may return false to indicate that a full
2292 : : * cleanup lock is required for processing by lazy_scan_prune. This is only
2293 : : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2294 : : * one or more tuples on the page. We always return true for non-aggressive
2295 : : * callers.
2296 : : *
2297 : : * If this function returns true, *has_lpdead_items gets set to true or false
2298 : : * depending on whether, upon return from this function, any LP_DEAD items are
2299 : : * present on the page. If this function returns false, *has_lpdead_items
2300 : : * is not updated.
2301 : : */
2302 : : static bool
2303 : 0 : lazy_scan_noprune(LVRelState *vacrel,
2304 : : Buffer buf,
2305 : : BlockNumber blkno,
2306 : : Page page,
2307 : : bool *has_lpdead_items)
2308 : : {
2309 : 0 : OffsetNumber offnum,
2310 : : maxoff;
2311 : 0 : int lpdead_items,
2312 : : live_tuples,
2313 : : recently_dead_tuples,
2314 : : missed_dead_tuples;
2315 : 0 : bool hastup;
2316 : 0 : HeapTupleHeader tupleheader;
2317 : 0 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2318 : 0 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2319 : 0 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2320 : :
2321 [ # # ]: 0 : Assert(BufferGetBlockNumber(buf) == blkno);
2322 : :
2323 : 0 : hastup = false; /* for now */
2324 : :
2325 : 0 : lpdead_items = 0;
2326 : 0 : live_tuples = 0;
2327 : 0 : recently_dead_tuples = 0;
2328 : 0 : missed_dead_tuples = 0;
2329 : :
2330 : 0 : maxoff = PageGetMaxOffsetNumber(page);
2331 [ # # ]: 0 : for (offnum = FirstOffsetNumber;
2332 : 0 : offnum <= maxoff;
2333 : 0 : offnum = OffsetNumberNext(offnum))
2334 : : {
2335 : 0 : ItemId itemid;
2336 : 0 : HeapTupleData tuple;
2337 : :
2338 : 0 : vacrel->offnum = offnum;
2339 : 0 : itemid = PageGetItemId(page, offnum);
2340 : :
2341 [ # # ]: 0 : if (!ItemIdIsUsed(itemid))
2342 : 0 : continue;
2343 : :
2344 [ # # ]: 0 : if (ItemIdIsRedirected(itemid))
2345 : : {
2346 : 0 : hastup = true;
2347 : 0 : continue;
2348 : : }
2349 : :
2350 [ # # ]: 0 : if (ItemIdIsDead(itemid))
2351 : : {
2352 : : /*
2353 : : * Deliberately don't set hastup=true here. See same point in
2354 : : * lazy_scan_prune for an explanation.
2355 : : */
2356 : 0 : deadoffsets[lpdead_items++] = offnum;
2357 : 0 : continue;
2358 : : }
2359 : :
2360 : 0 : hastup = true; /* page prevents rel truncation */
2361 : 0 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2362 [ # # ]: 0 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2363 : : &NoFreezePageRelfrozenXid,
2364 : : &NoFreezePageRelminMxid))
2365 : : {
2366 : : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2367 [ # # ]: 0 : if (vacrel->aggressive)
2368 : : {
2369 : : /*
2370 : : * Aggressive VACUUMs must always be able to advance rel's
2371 : : * relfrozenxid to a value >= FreezeLimit (and be able to
2372 : : * advance rel's relminmxid to a value >= MultiXactCutoff).
2373 : : * The ongoing aggressive VACUUM won't be able to do that
2374 : : * unless it can freeze an XID (or MXID) from this tuple now.
2375 : : *
2376 : : * The only safe option is to have caller perform processing
2377 : : * of this page using lazy_scan_prune. Caller might have to
2378 : : * wait a while for a cleanup lock, but it can't be helped.
2379 : : */
2380 : 0 : vacrel->offnum = InvalidOffsetNumber;
2381 : 0 : return false;
2382 : : }
2383 : :
2384 : : /*
2385 : : * Non-aggressive VACUUMs are under no obligation to advance
2386 : : * relfrozenxid (even by one XID). We can be much laxer here.
2387 : : *
2388 : : * Currently we always just accept an older final relfrozenxid
2389 : : * and/or relminmxid value. We never make caller wait or work a
2390 : : * little harder, even when it likely makes sense to do so.
2391 : : */
2392 : 0 : }
2393 : :
2394 : 0 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2395 : 0 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2396 : 0 : tuple.t_len = ItemIdGetLength(itemid);
2397 : 0 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2398 : :
2399 [ # # # # : 0 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
# # # # #
# ]
2400 : 0 : buf))
2401 : : {
2402 : : case HEAPTUPLE_DELETE_IN_PROGRESS:
2403 : : case HEAPTUPLE_LIVE:
2404 : :
2405 : : /*
2406 : : * Count both cases as live, just like lazy_scan_prune
2407 : : */
2408 : 0 : live_tuples++;
2409 : :
2410 : 0 : break;
2411 : : case HEAPTUPLE_DEAD:
2412 : :
2413 : : /*
2414 : : * There is some useful work for pruning to do, that won't be
2415 : : * done due to failure to get a cleanup lock.
2416 : : */
2417 : 0 : missed_dead_tuples++;
2418 : 0 : break;
2419 : : case HEAPTUPLE_RECENTLY_DEAD:
2420 : :
2421 : : /*
2422 : : * Count in recently_dead_tuples, just like lazy_scan_prune
2423 : : */
2424 : 0 : recently_dead_tuples++;
2425 : 0 : break;
2426 : : case HEAPTUPLE_INSERT_IN_PROGRESS:
2427 : :
2428 : : /*
2429 : : * Do not count these rows as live, just like lazy_scan_prune
2430 : : */
2431 : : break;
2432 : : default:
2433 [ # # # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2434 : 0 : break;
2435 : : }
2436 [ # # # ]: 0 : }
2437 : :
2438 : 0 : vacrel->offnum = InvalidOffsetNumber;
2439 : :
2440 : : /*
2441 : : * By here we know for sure that caller can put off freezing and pruning
2442 : : * this particular page until the next VACUUM. Remember its details now.
2443 : : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2444 : : */
2445 : 0 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2446 : 0 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2447 : :
2448 : : /* Save any LP_DEAD items found on the page in dead_items */
2449 [ # # ]: 0 : if (vacrel->nindexes == 0)
2450 : : {
2451 : : /* Using one-pass strategy (since table has no indexes) */
2452 [ # # ]: 0 : if (lpdead_items > 0)
2453 : : {
2454 : : /*
2455 : : * Perfunctory handling for the corner case where a single pass
2456 : : * strategy VACUUM cannot get a cleanup lock, and it turns out
2457 : : * that there is one or more LP_DEAD items: just count the LP_DEAD
2458 : : * items as missed_dead_tuples instead. (This is a bit dishonest,
2459 : : * but it beats having to maintain specialized heap vacuuming code
2460 : : * forever, for vanishingly little benefit.)
2461 : : */
2462 : 0 : hastup = true;
2463 : 0 : missed_dead_tuples += lpdead_items;
2464 : 0 : }
2465 : 0 : }
2466 [ # # ]: 0 : else if (lpdead_items > 0)
2467 : : {
2468 : : /*
2469 : : * Page has LP_DEAD items, and so any references/TIDs that remain in
2470 : : * indexes will be deleted during index vacuuming (and then marked
2471 : : * LP_UNUSED in the heap)
2472 : : */
2473 : 0 : vacrel->lpdead_item_pages++;
2474 : :
2475 : 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2476 : :
2477 : 0 : vacrel->lpdead_items += lpdead_items;
2478 : 0 : }
2479 : :
2480 : : /*
2481 : : * Finally, add relevant page-local counts to whole-VACUUM counts
2482 : : */
2483 : 0 : vacrel->live_tuples += live_tuples;
2484 : 0 : vacrel->recently_dead_tuples += recently_dead_tuples;
2485 : 0 : vacrel->missed_dead_tuples += missed_dead_tuples;
2486 [ # # ]: 0 : if (missed_dead_tuples > 0)
2487 : 0 : vacrel->missed_dead_pages++;
2488 : :
2489 : : /* Can't truncate this page */
2490 [ # # ]: 0 : if (hastup)
2491 : 0 : vacrel->nonempty_pages = blkno + 1;
2492 : :
2493 : : /* Did we find LP_DEAD items? */
2494 : 0 : *has_lpdead_items = (lpdead_items > 0);
2495 : :
2496 : : /* Caller won't need to call lazy_scan_prune with same page */
2497 : 0 : return true;
2498 : 0 : }
2499 : :
2500 : : /*
2501 : : * Main entry point for index vacuuming and heap vacuuming.
2502 : : *
2503 : : * Removes items collected in dead_items from table's indexes, then marks the
2504 : : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2505 : : * for full details.
2506 : : *
2507 : : * Also empties dead_items, freeing up space for later TIDs.
2508 : : *
2509 : : * We may choose to bypass index vacuuming at this point, though only when the
2510 : : * ongoing VACUUM operation will definitely only have one index scan/round of
2511 : : * index vacuuming.
2512 : : */
2513 : : static void
2514 : 65 : lazy_vacuum(LVRelState *vacrel)
2515 : : {
2516 : 65 : bool bypass;
2517 : :
2518 : : /* Should not end up here with no indexes */
2519 [ + - ]: 65 : Assert(vacrel->nindexes > 0);
2520 [ + - ]: 65 : Assert(vacrel->lpdead_item_pages > 0);
2521 : :
2522 [ + + ]: 65 : if (!vacrel->do_index_vacuuming)
2523 : : {
2524 [ + - ]: 1 : Assert(!vacrel->do_index_cleanup);
2525 : 1 : dead_items_reset(vacrel);
2526 : 1 : return;
2527 : : }
2528 : :
2529 : : /*
2530 : : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2531 : : *
2532 : : * We currently only do this in cases where the number of LP_DEAD items
2533 : : * for the entire VACUUM operation is close to zero. This avoids sharp
2534 : : * discontinuities in the duration and overhead of successive VACUUM
2535 : : * operations that run against the same table with a fixed workload.
2536 : : * Ideally, successive VACUUM operations will behave as if there are
2537 : : * exactly zero LP_DEAD items in cases where there are close to zero.
2538 : : *
2539 : : * This is likely to be helpful with a table that is continually affected
2540 : : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2541 : : * have small aberrations that lead to just a few heap pages retaining
2542 : : * only one or two LP_DEAD items. This is pretty common; even when the
2543 : : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2544 : : * impossible to predict whether HOT will be applied in 100% of cases.
2545 : : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2546 : : * HOT through careful tuning.
2547 : : */
2548 : 64 : bypass = false;
2549 [ + + - + ]: 64 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2550 : : {
2551 : 56 : BlockNumber threshold;
2552 : :
2553 [ + - ]: 56 : Assert(vacrel->num_index_scans == 0);
2554 [ + - ]: 56 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2555 [ + - ]: 56 : Assert(vacrel->do_index_vacuuming);
2556 [ + - ]: 56 : Assert(vacrel->do_index_cleanup);
2557 : :
2558 : : /*
2559 : : * This crossover point at which we'll start to do index vacuuming is
2560 : : * expressed as a percentage of the total number of heap pages in the
2561 : : * table that are known to have at least one LP_DEAD item. This is
2562 : : * much more important than the total number of LP_DEAD items, since
2563 : : * it's a proxy for the number of heap pages whose visibility map bits
2564 : : * cannot be set on account of bypassing index and heap vacuuming.
2565 : : *
2566 : : * We apply one further precautionary test: the space currently used
2567 : : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2568 : : * not exceed 32MB. This limits the risk that we will bypass index
2569 : : * vacuuming again and again until eventually there is a VACUUM whose
2570 : : * dead_items space is not CPU cache resident.
2571 : : *
2572 : : * We don't take any special steps to remember the LP_DEAD items (such
2573 : : * as counting them in our final update to the stats system) when the
2574 : : * optimization is applied. Though the accounting used in analyze.c's
2575 : : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2576 : : * rows in its own stats report, that's okay. The discrepancy should
2577 : : * be negligible. If this optimization is ever expanded to cover more
2578 : : * cases then this may need to be reconsidered.
2579 : : */
2580 : 56 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2581 [ + - ]: 56 : bypass = (vacrel->lpdead_item_pages < threshold &&
2582 : 0 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2583 : 56 : }
2584 : :
2585 [ - + ]: 64 : if (bypass)
2586 : : {
2587 : : /*
2588 : : * There are almost zero TIDs. Behave as if there were precisely
2589 : : * zero: bypass index vacuuming, but do index cleanup.
2590 : : *
2591 : : * We expect that the ongoing VACUUM operation will finish very
2592 : : * quickly, so there is no point in considering speeding up as a
2593 : : * failsafe against wraparound failure. (Index cleanup is expected to
2594 : : * finish very quickly in cases where there were no ambulkdelete()
2595 : : * calls.)
2596 : : */
2597 : 0 : vacrel->do_index_vacuuming = false;
2598 : 0 : }
2599 [ + - ]: 64 : else if (lazy_vacuum_all_indexes(vacrel))
2600 : : {
2601 : : /*
2602 : : * We successfully completed a round of index vacuuming. Do related
2603 : : * heap vacuuming now.
2604 : : */
2605 : 64 : lazy_vacuum_heap_rel(vacrel);
2606 : 64 : }
2607 : : else
2608 : : {
2609 : : /*
2610 : : * Failsafe case.
2611 : : *
2612 : : * We attempted index vacuuming, but didn't finish a full round/full
2613 : : * index scan. This happens when relfrozenxid or relminmxid is too
2614 : : * far in the past.
2615 : : *
2616 : : * From this point on the VACUUM operation will do no further index
2617 : : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2618 : : * back here again.
2619 : : */
2620 [ # # ]: 0 : Assert(VacuumFailsafeActive);
2621 : : }
2622 : :
2623 : : /*
2624 : : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2625 : : * vacuum)
2626 : : */
2627 : 64 : dead_items_reset(vacrel);
2628 [ - + ]: 65 : }
2629 : :
2630 : : /*
2631 : : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2632 : : *
2633 : : * Returns true in the common case when all indexes were successfully
2634 : : * vacuumed. Returns false in rare cases where we determined that the ongoing
2635 : : * VACUUM operation is at risk of taking too long to finish, leading to
2636 : : * wraparound failure.
2637 : : */
2638 : : static bool
2639 : 64 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2640 : : {
2641 : 64 : bool allindexes = true;
2642 : 64 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2643 : 64 : const int progress_start_index[] = {
2644 : : PROGRESS_VACUUM_PHASE,
2645 : : PROGRESS_VACUUM_INDEXES_TOTAL
2646 : : };
2647 : 64 : const int progress_end_index[] = {
2648 : : PROGRESS_VACUUM_INDEXES_TOTAL,
2649 : : PROGRESS_VACUUM_INDEXES_PROCESSED,
2650 : : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2651 : : };
2652 : 64 : int64 progress_start_val[2];
2653 : 64 : int64 progress_end_val[3];
2654 : :
2655 [ + - ]: 64 : Assert(vacrel->nindexes > 0);
2656 [ + - ]: 64 : Assert(vacrel->do_index_vacuuming);
2657 [ + - ]: 64 : Assert(vacrel->do_index_cleanup);
2658 : :
2659 : : /* Precheck for XID wraparound emergencies */
2660 [ - + ]: 64 : if (lazy_check_wraparound_failsafe(vacrel))
2661 : : {
2662 : : /* Wraparound emergency -- don't even start an index scan */
2663 : 0 : return false;
2664 : : }
2665 : :
2666 : : /*
2667 : : * Report that we are now vacuuming indexes and the number of indexes to
2668 : : * vacuum.
2669 : : */
2670 : 64 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2671 : 64 : progress_start_val[1] = vacrel->nindexes;
2672 : 64 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2673 : :
2674 [ + + ]: 64 : if (!ParallelVacuumIsActive(vacrel))
2675 : : {
2676 [ + + ]: 164 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2677 : : {
2678 : 107 : Relation indrel = vacrel->indrels[idx];
2679 : 107 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2680 : :
2681 : 214 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2682 : 107 : old_live_tuples,
2683 : 107 : vacrel);
2684 : :
2685 : : /* Report the number of indexes vacuumed */
2686 : 107 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2687 : 107 : idx + 1);
2688 : :
2689 [ - + ]: 107 : if (lazy_check_wraparound_failsafe(vacrel))
2690 : : {
2691 : : /* Wraparound emergency -- end current index scan */
2692 : 0 : allindexes = false;
2693 : 0 : break;
2694 : : }
2695 [ - + ]: 107 : }
2696 : 57 : }
2697 : : else
2698 : : {
2699 : : /* Outsource everything to parallel variant */
2700 : 14 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2701 : 7 : vacrel->num_index_scans);
2702 : :
2703 : : /*
2704 : : * Do a postcheck to consider applying wraparound failsafe now. Note
2705 : : * that parallel VACUUM only gets the precheck and this postcheck.
2706 : : */
2707 [ + - ]: 7 : if (lazy_check_wraparound_failsafe(vacrel))
2708 : 0 : allindexes = false;
2709 : : }
2710 : :
2711 : : /*
2712 : : * We delete all LP_DEAD items from the first heap pass in all indexes on
2713 : : * each call here (except calls where we choose to do the failsafe). This
2714 : : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2715 : : * of the failsafe triggering, which prevents the next call from taking
2716 : : * place).
2717 : : */
2718 [ + + + - ]: 64 : Assert(vacrel->num_index_scans > 0 ||
2719 : : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2720 [ - + # # ]: 64 : Assert(allindexes || VacuumFailsafeActive);
2721 : :
2722 : : /*
2723 : : * Increase and report the number of index scans. Also, we reset
2724 : : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2725 : : *
2726 : : * We deliberately include the case where we started a round of bulk
2727 : : * deletes that we weren't able to finish due to the failsafe triggering.
2728 : : */
2729 : 64 : vacrel->num_index_scans++;
2730 : 64 : progress_end_val[0] = 0;
2731 : 64 : progress_end_val[1] = 0;
2732 : 64 : progress_end_val[2] = vacrel->num_index_scans;
2733 : 64 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2734 : :
2735 : 64 : return allindexes;
2736 : 64 : }
2737 : :
2738 : : /*
2739 : : * Read stream callback for vacuum's third phase (second pass over the heap).
2740 : : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2741 : : * if there are no further blocks to vacuum.
2742 : : *
2743 : : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2744 : : */
2745 : : static BlockNumber
2746 : 1025 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2747 : : void *callback_private_data,
2748 : : void *per_buffer_data)
2749 : : {
2750 : 1025 : TidStoreIter *iter = callback_private_data;
2751 : 1025 : TidStoreIterResult *iter_result;
2752 : :
2753 : 1025 : iter_result = TidStoreIterateNext(iter);
2754 [ + + ]: 1025 : if (iter_result == NULL)
2755 : 64 : return InvalidBlockNumber;
2756 : :
2757 : : /*
2758 : : * Save the TidStoreIterResult for later, so we can extract the offsets.
2759 : : * It is safe to copy the result, according to TidStoreIterateNext().
2760 : : */
2761 : 961 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2762 : :
2763 : 961 : return iter_result->blkno;
2764 : 1025 : }
2765 : :
2766 : : /*
2767 : : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2768 : : *
2769 : : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2770 : : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2771 : : *
2772 : : * We may also be able to truncate the line pointer array of the heap pages we
2773 : : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2774 : : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2775 : : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2776 : : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2777 : : * page's line pointer array).
2778 : : *
2779 : : * Note: the reason for doing this as a second pass is we cannot remove the
2780 : : * tuples until we've removed their index entries, and we want to process
2781 : : * index entry removal in batches as large as possible.
2782 : : */
2783 : : static void
2784 : 64 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2785 : : {
2786 : 64 : ReadStream *stream;
2787 : 64 : BlockNumber vacuumed_pages = 0;
2788 : 64 : Buffer vmbuffer = InvalidBuffer;
2789 : 64 : LVSavedErrInfo saved_err_info;
2790 : 64 : TidStoreIter *iter;
2791 : :
2792 [ + - ]: 64 : Assert(vacrel->do_index_vacuuming);
2793 [ + - ]: 64 : Assert(vacrel->do_index_cleanup);
2794 [ + - ]: 64 : Assert(vacrel->num_index_scans > 0);
2795 : :
2796 : : /* Report that we are now vacuuming the heap */
2797 : 64 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2798 : : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2799 : :
2800 : : /* Update error traceback information */
2801 : 64 : update_vacuum_error_info(vacrel, &saved_err_info,
2802 : : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2803 : : InvalidBlockNumber, InvalidOffsetNumber);
2804 : :
2805 : 64 : iter = TidStoreBeginIterate(vacrel->dead_items);
2806 : :
2807 : : /*
2808 : : * Set up the read stream for vacuum's second pass through the heap.
2809 : : *
2810 : : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2811 : : * not need to wait for IO and does not perform locking. Once we support
2812 : : * parallelism it should still be fine, as presumably the holder of locks
2813 : : * would never be blocked by IO while holding the lock.
2814 : : */
2815 : 64 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2816 : : READ_STREAM_USE_BATCHING,
2817 : 64 : vacrel->bstrategy,
2818 : 64 : vacrel->rel,
2819 : : MAIN_FORKNUM,
2820 : : vacuum_reap_lp_read_stream_next,
2821 : 64 : iter,
2822 : : sizeof(TidStoreIterResult));
2823 : :
2824 : 1025 : while (true)
2825 : : {
2826 : 1025 : BlockNumber blkno;
2827 : 1025 : Buffer buf;
2828 : 1025 : Page page;
2829 : 1025 : TidStoreIterResult *iter_result;
2830 : 1025 : Size freespace;
2831 : 1025 : OffsetNumber offsets[MaxOffsetNumber];
2832 : 1025 : int num_offsets;
2833 : :
2834 : 1025 : vacuum_delay_point(false);
2835 : :
2836 : 1025 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2837 : :
2838 : : /* The relation is exhausted */
2839 [ + + ]: 1025 : if (!BufferIsValid(buf))
2840 : 64 : break;
2841 : :
2842 : 961 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2843 : :
2844 [ + - ]: 961 : Assert(iter_result);
2845 : 961 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2846 [ - + ]: 961 : Assert(num_offsets <= lengthof(offsets));
2847 : :
2848 : : /*
2849 : : * Pin the visibility map page in case we need to mark the page
2850 : : * all-visible. In most cases this will be very cheap, because we'll
2851 : : * already have the correct page pinned anyway.
2852 : : */
2853 : 961 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2854 : :
2855 : : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2856 : 961 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2857 : 1922 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2858 : 961 : num_offsets, vmbuffer);
2859 : :
2860 : : /* Now that we've vacuumed the page, record its available space */
2861 : 961 : page = BufferGetPage(buf);
2862 : 961 : freespace = PageGetHeapFreeSpace(page);
2863 : :
2864 : 961 : UnlockReleaseBuffer(buf);
2865 : 961 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2866 : 961 : vacuumed_pages++;
2867 [ - + + ]: 1025 : }
2868 : :
2869 : 64 : read_stream_end(stream);
2870 : 64 : TidStoreEndIterate(iter);
2871 : :
2872 : 64 : vacrel->blkno = InvalidBlockNumber;
2873 [ - + ]: 64 : if (BufferIsValid(vmbuffer))
2874 : 64 : ReleaseBuffer(vmbuffer);
2875 : :
2876 : : /*
2877 : : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2878 : : * the second heap pass. No more, no less.
2879 : : */
2880 [ + + + - ]: 64 : Assert(vacrel->num_index_scans > 1 ||
2881 : : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2882 : : vacuumed_pages == vacrel->lpdead_item_pages));
2883 : :
2884 [ - + - + ]: 64 : ereport(DEBUG2,
2885 : : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2886 : : vacrel->relname, vacrel->dead_items_info->num_items,
2887 : : vacuumed_pages)));
2888 : :
2889 : : /* Revert to the previous phase information for error traceback */
2890 : 64 : restore_vacuum_error_info(vacrel, &saved_err_info);
2891 : 64 : }
2892 : :
2893 : : /*
2894 : : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2895 : : * vacrel->dead_items store.
2896 : : *
2897 : : * Caller must have an exclusive buffer lock on the buffer (though a full
2898 : : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2899 : : * a pin on blkno's visibility map page.
2900 : : */
2901 : : static void
2902 : 961 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2903 : : OffsetNumber *deadoffsets, int num_offsets,
2904 : : Buffer vmbuffer)
2905 : : {
2906 : 961 : Page page = BufferGetPage(buffer);
2907 : 961 : OffsetNumber unused[MaxHeapTuplesPerPage];
2908 : 961 : int nunused = 0;
2909 : 961 : TransactionId visibility_cutoff_xid;
2910 : 961 : TransactionId conflict_xid = InvalidTransactionId;
2911 : 961 : bool all_frozen;
2912 : 961 : LVSavedErrInfo saved_err_info;
2913 : 961 : uint8 vmflags = 0;
2914 : :
2915 [ + - ]: 961 : Assert(vacrel->do_index_vacuuming);
2916 : :
2917 : 961 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2918 : :
2919 : : /* Update error traceback information */
2920 : 1922 : update_vacuum_error_info(vacrel, &saved_err_info,
2921 : 961 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2922 : : InvalidOffsetNumber);
2923 : :
2924 : : /*
2925 : : * Before marking dead items unused, check whether the page will become
2926 : : * all-visible once that change is applied. This lets us reap the tuples
2927 : : * and mark the page all-visible within the same critical section,
2928 : : * enabling both changes to be emitted in a single WAL record. Since the
2929 : : * visibility checks may perform I/O and allocate memory, they must be
2930 : : * done outside the critical section.
2931 : : */
2932 [ + + + + ]: 1922 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2933 : 961 : vacrel->cutoffs.OldestXmin,
2934 : 961 : deadoffsets, num_offsets,
2935 : : &all_frozen, &visibility_cutoff_xid,
2936 : 961 : &vacrel->offnum))
2937 : : {
2938 : 957 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2939 [ + + ]: 957 : if (all_frozen)
2940 : : {
2941 : 763 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2942 [ + - ]: 763 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2943 : 763 : }
2944 : :
2945 : : /*
2946 : : * Take the lock on the vmbuffer before entering a critical section.
2947 : : * The heap page lock must also be held while updating the VM to
2948 : : * ensure consistency.
2949 : : */
2950 : 957 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2951 : 957 : }
2952 : :
2953 : 961 : START_CRIT_SECTION();
2954 : :
2955 [ + + ]: 70624 : for (int i = 0; i < num_offsets; i++)
2956 : : {
2957 : 69663 : ItemId itemid;
2958 : 69663 : OffsetNumber toff = deadoffsets[i];
2959 : :
2960 : 69663 : itemid = PageGetItemId(page, toff);
2961 : :
2962 [ + - ]: 69663 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2963 : 69663 : ItemIdSetUnused(itemid);
2964 : 69663 : unused[nunused++] = toff;
2965 : 69663 : }
2966 : :
2967 [ + - ]: 961 : Assert(nunused > 0);
2968 : :
2969 : : /* Attempt to truncate line pointer array now */
2970 : 961 : PageTruncateLinePointerArray(page);
2971 : :
2972 [ + + ]: 961 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2973 : : {
2974 : : /*
2975 : : * The page is guaranteed to have had dead line pointers, so we always
2976 : : * set PD_ALL_VISIBLE.
2977 : : */
2978 : 957 : PageSetAllVisible(page);
2979 : 1914 : visibilitymap_set_vmbits(blkno,
2980 : 957 : vmbuffer, vmflags,
2981 : 957 : vacrel->rel->rd_locator);
2982 : 957 : conflict_xid = visibility_cutoff_xid;
2983 : 957 : }
2984 : :
2985 : : /*
2986 : : * Mark buffer dirty before we write WAL.
2987 : : */
2988 : 961 : MarkBufferDirty(buffer);
2989 : :
2990 : : /* XLOG stuff */
2991 [ + + + + : 961 : if (RelationNeedsWAL(vacrel->rel))
+ - - + ]
2992 : : {
2993 : 1428 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2994 [ + + ]: 714 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2995 : 714 : vmflags,
2996 : 714 : conflict_xid,
2997 : : false, /* no cleanup lock required */
2998 : : PRUNE_VACUUM_CLEANUP,
2999 : : NULL, 0, /* frozen */
3000 : : NULL, 0, /* redirected */
3001 : : NULL, 0, /* dead */
3002 : 714 : unused, nunused);
3003 : 714 : }
3004 : :
3005 [ + - ]: 961 : END_CRIT_SECTION();
3006 : :
3007 [ + + ]: 961 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
3008 : : {
3009 : : /* Count the newly set VM page for logging */
3010 : 957 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
3011 : 957 : vacrel->vm_new_visible_pages++;
3012 [ + + ]: 957 : if (all_frozen)
3013 : 763 : vacrel->vm_new_visible_frozen_pages++;
3014 : 957 : }
3015 : :
3016 : : /* Revert to the previous phase information for error traceback */
3017 : 961 : restore_vacuum_error_info(vacrel, &saved_err_info);
3018 : 961 : }
3019 : :
3020 : : /*
3021 : : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
3022 : : * relfrozenxid and/or relminmxid that is dangerously far in the past.
3023 : : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
3024 : : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
3025 : : *
3026 : : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
3027 : : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
3028 : : * that it started out with.
3029 : : *
3030 : : * Returns true when failsafe has been triggered.
3031 : : */
3032 : : static bool
3033 : 823 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
3034 : : {
3035 : : /* Don't warn more than once per VACUUM */
3036 [ - + ]: 823 : if (VacuumFailsafeActive)
3037 : 0 : return true;
3038 : :
3039 [ - + ]: 823 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
3040 : : {
3041 : 0 : const int progress_index[] = {
3042 : : PROGRESS_VACUUM_INDEXES_TOTAL,
3043 : : PROGRESS_VACUUM_INDEXES_PROCESSED,
3044 : : PROGRESS_VACUUM_MODE
3045 : : };
3046 : 0 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
3047 : :
3048 : 0 : VacuumFailsafeActive = true;
3049 : :
3050 : : /*
3051 : : * Abandon use of a buffer access strategy to allow use of all of
3052 : : * shared buffers. We assume the caller who allocated the memory for
3053 : : * the BufferAccessStrategy will free it.
3054 : : */
3055 : 0 : vacrel->bstrategy = NULL;
3056 : :
3057 : : /* Disable index vacuuming, index cleanup, and heap rel truncation */
3058 : 0 : vacrel->do_index_vacuuming = false;
3059 : 0 : vacrel->do_index_cleanup = false;
3060 : 0 : vacrel->do_rel_truncate = false;
3061 : :
3062 : : /* Reset the progress counters and set the failsafe mode */
3063 : 0 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
3064 : :
3065 [ # # # # ]: 0 : ereport(WARNING,
3066 : : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3067 : : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3068 : : vacrel->num_index_scans),
3069 : : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3070 : : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3071 : : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3072 : :
3073 : : /* Stop applying cost limits from this point on */
3074 : 0 : VacuumCostActive = false;
3075 : 0 : VacuumCostBalance = 0;
3076 : :
3077 : 0 : return true;
3078 : 0 : }
3079 : :
3080 : 823 : return false;
3081 : 823 : }
3082 : :
3083 : : /*
3084 : : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3085 : : */
3086 : : static void
3087 : 454 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3088 : : {
3089 : 454 : double reltuples = vacrel->new_rel_tuples;
3090 : 454 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3091 : 454 : const int progress_start_index[] = {
3092 : : PROGRESS_VACUUM_PHASE,
3093 : : PROGRESS_VACUUM_INDEXES_TOTAL
3094 : : };
3095 : 454 : const int progress_end_index[] = {
3096 : : PROGRESS_VACUUM_INDEXES_TOTAL,
3097 : : PROGRESS_VACUUM_INDEXES_PROCESSED
3098 : : };
3099 : 454 : int64 progress_start_val[2];
3100 : 454 : int64 progress_end_val[2] = {0, 0};
3101 : :
3102 [ + - ]: 454 : Assert(vacrel->do_index_cleanup);
3103 [ + - ]: 454 : Assert(vacrel->nindexes > 0);
3104 : :
3105 : : /*
3106 : : * Report that we are now cleaning up indexes and the number of indexes to
3107 : : * cleanup.
3108 : : */
3109 : 454 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3110 : 454 : progress_start_val[1] = vacrel->nindexes;
3111 : 454 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3112 : :
3113 [ + + ]: 454 : if (!ParallelVacuumIsActive(vacrel))
3114 : : {
3115 [ + + ]: 1053 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3116 : : {
3117 : 605 : Relation indrel = vacrel->indrels[idx];
3118 : 605 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3119 : :
3120 : 605 : vacrel->indstats[idx] =
3121 : 1210 : lazy_cleanup_one_index(indrel, istat, reltuples,
3122 : 605 : estimated_count, vacrel);
3123 : :
3124 : : /* Report the number of indexes cleaned up */
3125 : 605 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3126 : 605 : idx + 1);
3127 : 605 : }
3128 : 448 : }
3129 : : else
3130 : : {
3131 : : /* Outsource everything to parallel variant */
3132 : 12 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3133 : 6 : vacrel->num_index_scans,
3134 : 6 : estimated_count);
3135 : : }
3136 : :
3137 : : /* Reset the progress counters */
3138 : 454 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3139 : 454 : }
3140 : :
3141 : : /*
3142 : : * lazy_vacuum_one_index() -- vacuum index relation.
3143 : : *
3144 : : * Delete all the index tuples containing a TID collected in
3145 : : * vacrel->dead_items. Also update running statistics. Exact
3146 : : * details depend on index AM's ambulkdelete routine.
3147 : : *
3148 : : * reltuples is the number of heap tuples to be passed to the
3149 : : * bulkdelete callback. It's always assumed to be estimated.
3150 : : * See indexam.sgml for more info.
3151 : : *
3152 : : * Returns bulk delete stats derived from input stats
3153 : : */
3154 : : static IndexBulkDeleteResult *
3155 : 107 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3156 : : double reltuples, LVRelState *vacrel)
3157 : : {
3158 : 107 : IndexVacuumInfo ivinfo;
3159 : 107 : LVSavedErrInfo saved_err_info;
3160 : :
3161 : 107 : ivinfo.index = indrel;
3162 : 107 : ivinfo.heaprel = vacrel->rel;
3163 : 107 : ivinfo.analyze_only = false;
3164 : 107 : ivinfo.report_progress = false;
3165 : 107 : ivinfo.estimated_count = true;
3166 : 107 : ivinfo.message_level = DEBUG2;
3167 : 107 : ivinfo.num_heap_tuples = reltuples;
3168 : 107 : ivinfo.strategy = vacrel->bstrategy;
3169 : :
3170 : : /*
3171 : : * Update error traceback information.
3172 : : *
3173 : : * The index name is saved during this phase and restored immediately
3174 : : * after this phase. See vacuum_error_callback.
3175 : : */
3176 [ + - ]: 107 : Assert(vacrel->indname == NULL);
3177 : 107 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3178 : 107 : update_vacuum_error_info(vacrel, &saved_err_info,
3179 : : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3180 : : InvalidBlockNumber, InvalidOffsetNumber);
3181 : :
3182 : : /* Do bulk deletion */
3183 : 214 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3184 : 107 : vacrel->dead_items_info);
3185 : :
3186 : : /* Revert to the previous phase information for error traceback */
3187 : 107 : restore_vacuum_error_info(vacrel, &saved_err_info);
3188 : 107 : pfree(vacrel->indname);
3189 : 107 : vacrel->indname = NULL;
3190 : :
3191 : 214 : return istat;
3192 : 107 : }
3193 : :
3194 : : /*
3195 : : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3196 : : *
3197 : : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3198 : : * of heap tuples and estimated_count is true if reltuples is an
3199 : : * estimated value. See indexam.sgml for more info.
3200 : : *
3201 : : * Returns bulk delete stats derived from input stats
3202 : : */
3203 : : static IndexBulkDeleteResult *
3204 : 605 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3205 : : double reltuples, bool estimated_count,
3206 : : LVRelState *vacrel)
3207 : : {
3208 : 605 : IndexVacuumInfo ivinfo;
3209 : 605 : LVSavedErrInfo saved_err_info;
3210 : :
3211 : 605 : ivinfo.index = indrel;
3212 : 605 : ivinfo.heaprel = vacrel->rel;
3213 : 605 : ivinfo.analyze_only = false;
3214 : 605 : ivinfo.report_progress = false;
3215 : 605 : ivinfo.estimated_count = estimated_count;
3216 : 605 : ivinfo.message_level = DEBUG2;
3217 : :
3218 : 605 : ivinfo.num_heap_tuples = reltuples;
3219 : 605 : ivinfo.strategy = vacrel->bstrategy;
3220 : :
3221 : : /*
3222 : : * Update error traceback information.
3223 : : *
3224 : : * The index name is saved during this phase and restored immediately
3225 : : * after this phase. See vacuum_error_callback.
3226 : : */
3227 [ + - ]: 605 : Assert(vacrel->indname == NULL);
3228 : 605 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3229 : 605 : update_vacuum_error_info(vacrel, &saved_err_info,
3230 : : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3231 : : InvalidBlockNumber, InvalidOffsetNumber);
3232 : :
3233 : 605 : istat = vac_cleanup_one_index(&ivinfo, istat);
3234 : :
3235 : : /* Revert to the previous phase information for error traceback */
3236 : 605 : restore_vacuum_error_info(vacrel, &saved_err_info);
3237 : 605 : pfree(vacrel->indname);
3238 : 605 : vacrel->indname = NULL;
3239 : :
3240 : 1210 : return istat;
3241 : 605 : }
3242 : :
3243 : : /*
3244 : : * should_attempt_truncation - should we attempt to truncate the heap?
3245 : : *
3246 : : * Don't even think about it unless we have a shot at releasing a goodly
3247 : : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3248 : : * an AccessExclusive lock must be replayed on any hot standby, where it can
3249 : : * be particularly disruptive.
3250 : : *
3251 : : * Also don't attempt it if wraparound failsafe is in effect. The entire
3252 : : * system might be refusing to allocate new XIDs at this point. The system
3253 : : * definitely won't return to normal unless and until VACUUM actually advances
3254 : : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3255 : : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3256 : : * truncate the table under these circumstances, an XID exhaustion error might
3257 : : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3258 : : * There is very little chance of truncation working out when the failsafe is
3259 : : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3260 : : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3261 : : * we're called.
3262 : : */
3263 : : static bool
3264 : 645 : should_attempt_truncation(LVRelState *vacrel)
3265 : : {
3266 : 645 : BlockNumber possibly_freeable;
3267 : :
3268 [ + + - + ]: 645 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3269 : 10 : return false;
3270 : :
3271 : 635 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3272 [ + + + + ]: 670 : if (possibly_freeable > 0 &&
3273 [ + - ]: 35 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3274 : 35 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3275 : 33 : return true;
3276 : :
3277 : 602 : return false;
3278 : 645 : }
3279 : :
3280 : : /*
3281 : : * lazy_truncate_heap - try to truncate off any empty pages at the end
3282 : : */
3283 : : static void
3284 : 33 : lazy_truncate_heap(LVRelState *vacrel)
3285 : : {
3286 : 33 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3287 : 33 : BlockNumber new_rel_pages;
3288 : 33 : bool lock_waiter_detected;
3289 : 33 : int lock_retry;
3290 : :
3291 : : /* Report that we are now truncating */
3292 : 33 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3293 : : PROGRESS_VACUUM_PHASE_TRUNCATE);
3294 : :
3295 : : /* Update error traceback information one last time */
3296 : 66 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3297 : 33 : vacrel->nonempty_pages, InvalidOffsetNumber);
3298 : :
3299 : : /*
3300 : : * Loop until no more truncating can be done.
3301 : : */
3302 : 33 : do
3303 : : {
3304 : : /*
3305 : : * We need full exclusive lock on the relation in order to do
3306 : : * truncation. If we can't get it, give up rather than waiting --- we
3307 : : * don't want to block other backends, and we don't want to deadlock
3308 : : * (which is quite possible considering we already hold a lower-grade
3309 : : * lock).
3310 : : */
3311 : 33 : lock_waiter_detected = false;
3312 : 33 : lock_retry = 0;
3313 : 33 : while (true)
3314 : : {
3315 [ - + ]: 33 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3316 : 33 : break;
3317 : :
3318 : : /*
3319 : : * Check for interrupts while trying to (re-)acquire the exclusive
3320 : : * lock.
3321 : : */
3322 [ # # ]: 0 : CHECK_FOR_INTERRUPTS();
3323 : :
3324 [ # # ]: 0 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3325 : : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3326 : : {
3327 : : /*
3328 : : * We failed to establish the lock in the specified number of
3329 : : * retries. This means we give up truncating.
3330 : : */
3331 [ # # # # : 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
# # # # #
# ]
3332 : : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3333 : : vacrel->relname)));
3334 : 0 : return;
3335 : : }
3336 : :
3337 : 0 : (void) WaitLatch(MyLatch,
3338 : : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3339 : : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3340 : : WAIT_EVENT_VACUUM_TRUNCATE);
3341 : 0 : ResetLatch(MyLatch);
3342 : : }
3343 : :
3344 : : /*
3345 : : * Now that we have exclusive lock, look to see if the rel has grown
3346 : : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3347 : : * the newly added pages presumably contain non-deletable tuples.
3348 : : */
3349 : 33 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3350 [ - + ]: 33 : if (new_rel_pages != orig_rel_pages)
3351 : : {
3352 : : /*
3353 : : * Note: we intentionally don't update vacrel->rel_pages with the
3354 : : * new rel size here. If we did, it would amount to assuming that
3355 : : * the new pages are empty, which is unlikely. Leaving the numbers
3356 : : * alone amounts to assuming that the new pages have the same
3357 : : * tuple density as existing ones, which is less unlikely.
3358 : : */
3359 : 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3360 : 0 : return;
3361 : : }
3362 : :
3363 : : /*
3364 : : * Scan backwards from the end to verify that the end pages actually
3365 : : * contain no tuples. This is *necessary*, not optional, because
3366 : : * other backends could have added tuples to these pages whilst we
3367 : : * were vacuuming.
3368 : : */
3369 : 33 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3370 : 33 : vacrel->blkno = new_rel_pages;
3371 : :
3372 [ - + ]: 33 : if (new_rel_pages >= orig_rel_pages)
3373 : : {
3374 : : /* can't do anything after all */
3375 : 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3376 : 0 : return;
3377 : : }
3378 : :
3379 : : /*
3380 : : * Okay to truncate.
3381 : : */
3382 : 33 : RelationTruncate(vacrel->rel, new_rel_pages);
3383 : :
3384 : : /*
3385 : : * We can release the exclusive lock as soon as we have truncated.
3386 : : * Other backends can't safely access the relation until they have
3387 : : * processed the smgr invalidation that smgrtruncate sent out ... but
3388 : : * that should happen as part of standard invalidation processing once
3389 : : * they acquire lock on the relation.
3390 : : */
3391 : 33 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3392 : :
3393 : : /*
3394 : : * Update statistics. Here, it *is* correct to adjust rel_pages
3395 : : * without also touching reltuples, since the tuple count wasn't
3396 : : * changed by the truncation.
3397 : : */
3398 : 33 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3399 : 33 : vacrel->rel_pages = new_rel_pages;
3400 : :
3401 [ - + # # : 33 : ereport(vacrel->verbose ? INFO : DEBUG2,
- + - + #
# ]
3402 : : (errmsg("table \"%s\": truncated %u to %u pages",
3403 : : vacrel->relname,
3404 : : orig_rel_pages, new_rel_pages)));
3405 : 33 : orig_rel_pages = new_rel_pages;
3406 [ + - + - ]: 33 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3407 [ - + ]: 33 : }
3408 : :
3409 : : /*
3410 : : * Rescan end pages to verify that they are (still) empty of tuples.
3411 : : *
3412 : : * Returns number of nondeletable pages (last nonempty page + 1).
3413 : : */
3414 : : static BlockNumber
3415 : 33 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3416 : : {
3417 : : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3418 : : "prefetch size must be power of 2");
3419 : :
3420 : 33 : BlockNumber blkno;
3421 : 33 : BlockNumber prefetchedUntil;
3422 : 33 : instr_time starttime;
3423 : :
3424 : : /* Initialize the starttime if we check for conflicting lock requests */
3425 : 33 : INSTR_TIME_SET_CURRENT(starttime);
3426 : :
3427 : : /*
3428 : : * Start checking blocks at what we believe relation end to be and move
3429 : : * backwards. (Strange coding of loop control is needed because blkno is
3430 : : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3431 : : * in forward direction, so that OS-level readahead can kick in.
3432 : : */
3433 : 33 : blkno = vacrel->rel_pages;
3434 : 33 : prefetchedUntil = InvalidBlockNumber;
3435 [ + + ]: 611 : while (blkno > vacrel->nonempty_pages)
3436 : : {
3437 : 578 : Buffer buf;
3438 : 578 : Page page;
3439 : 578 : OffsetNumber offnum,
3440 : : maxoff;
3441 : 578 : bool hastup;
3442 : :
3443 : : /*
3444 : : * Check if another process requests a lock on our relation. We are
3445 : : * holding an AccessExclusiveLock here, so they will be waiting. We
3446 : : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3447 : : * only check if that interval has elapsed once every 32 blocks to
3448 : : * keep the number of system calls and actual shared lock table
3449 : : * lookups to a minimum.
3450 : : */
3451 [ + + ]: 578 : if ((blkno % 32) == 0)
3452 : : {
3453 : 20 : instr_time currenttime;
3454 : 20 : instr_time elapsed;
3455 : :
3456 : 20 : INSTR_TIME_SET_CURRENT(currenttime);
3457 : 20 : elapsed = currenttime;
3458 : 20 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3459 : 20 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3460 [ + - ]: 20 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3461 : : {
3462 [ # # ]: 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3463 : : {
3464 [ # # # # : 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
# # # # #
# ]
3465 : : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3466 : : vacrel->relname)));
3467 : :
3468 : 0 : *lock_waiter_detected = true;
3469 : 0 : return blkno;
3470 : : }
3471 : 0 : starttime = currenttime;
3472 : 0 : }
3473 [ - + ]: 20 : }
3474 : :
3475 : : /*
3476 : : * We don't insert a vacuum delay point here, because we have an
3477 : : * exclusive lock on the table which we want to hold for as short a
3478 : : * time as possible. We still need to check for interrupts however.
3479 : : */
3480 [ - + ]: 578 : CHECK_FOR_INTERRUPTS();
3481 : :
3482 : 578 : blkno--;
3483 : :
3484 : : /* If we haven't prefetched this lot yet, do so now. */
3485 [ + + ]: 578 : if (prefetchedUntil > blkno)
3486 : : {
3487 : 45 : BlockNumber prefetchStart;
3488 : 45 : BlockNumber pblkno;
3489 : :
3490 : 45 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3491 [ + + ]: 849 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3492 : : {
3493 : 804 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3494 [ + - ]: 804 : CHECK_FOR_INTERRUPTS();
3495 : 804 : }
3496 : 45 : prefetchedUntil = prefetchStart;
3497 : 45 : }
3498 : :
3499 : 1156 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3500 : 578 : vacrel->bstrategy);
3501 : :
3502 : : /* In this phase we only need shared access to the buffer */
3503 : 578 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3504 : :
3505 : 578 : page = BufferGetPage(buf);
3506 : :
3507 [ + + + + ]: 578 : if (PageIsNew(page) || PageIsEmpty(page))
3508 : : {
3509 : 245 : UnlockReleaseBuffer(buf);
3510 : 245 : continue;
3511 : : }
3512 : :
3513 : 333 : hastup = false;
3514 : 333 : maxoff = PageGetMaxOffsetNumber(page);
3515 [ + + ]: 666 : for (offnum = FirstOffsetNumber;
3516 : 666 : offnum <= maxoff;
3517 : 333 : offnum = OffsetNumberNext(offnum))
3518 : : {
3519 : 333 : ItemId itemid;
3520 : :
3521 : 333 : itemid = PageGetItemId(page, offnum);
3522 : :
3523 : : /*
3524 : : * Note: any non-unused item should be taken as a reason to keep
3525 : : * this page. Even an LP_DEAD item makes truncation unsafe, since
3526 : : * we must not have cleaned out its index entries.
3527 : : */
3528 [ + - ]: 333 : if (ItemIdIsUsed(itemid))
3529 : : {
3530 : 0 : hastup = true;
3531 : 0 : break; /* can stop scanning */
3532 : : }
3533 [ - - + ]: 333 : } /* scan along page */
3534 : :
3535 : 333 : UnlockReleaseBuffer(buf);
3536 : :
3537 : : /* Done scanning if we found a tuple here */
3538 [ - + ]: 333 : if (hastup)
3539 : 0 : return blkno + 1;
3540 [ + - + ]: 578 : }
3541 : :
3542 : : /*
3543 : : * If we fall out of the loop, all the previously-thought-to-be-empty
3544 : : * pages still are; we need not bother to look at the last known-nonempty
3545 : : * page.
3546 : : */
3547 : 33 : return vacrel->nonempty_pages;
3548 : 33 : }
3549 : :
3550 : : /*
3551 : : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3552 : : * shared memory). Sets both in vacrel for caller.
3553 : : *
3554 : : * Also handles parallel initialization as part of allocating dead_items in
3555 : : * DSM when required.
3556 : : */
3557 : : static void
3558 : 645 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3559 : : {
3560 : 645 : VacDeadItemsInfo *dead_items_info;
3561 [ - + # # ]: 645 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3562 : 0 : autovacuum_work_mem != -1 ?
3563 : 645 : autovacuum_work_mem : maintenance_work_mem;
3564 : :
3565 : : /*
3566 : : * Initialize state for a parallel vacuum. As of now, only one worker can
3567 : : * be used for an index, so we invoke parallelism only if there are at
3568 : : * least two indexes on a table.
3569 : : */
3570 [ + + + + : 645 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
+ + ]
3571 : : {
3572 : : /*
3573 : : * Since parallel workers cannot access data in temporary tables, we
3574 : : * can't perform parallel vacuum on them.
3575 : : */
3576 [ + + ]: 128 : if (RelationUsesLocalBuffers(vacrel->rel))
3577 : : {
3578 : : /*
3579 : : * Give warning only if the user explicitly tries to perform a
3580 : : * parallel vacuum on the temporary table.
3581 : : */
3582 [ - + ]: 1 : if (nworkers > 0)
3583 [ - + + - ]: 1 : ereport(WARNING,
3584 : : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3585 : : vacrel->relname)));
3586 : 1 : }
3587 : : else
3588 : 254 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3589 : 127 : vacrel->nindexes, nworkers,
3590 : 127 : vac_work_mem,
3591 : 127 : vacrel->verbose ? INFO : DEBUG2,
3592 : 127 : vacrel->bstrategy);
3593 : :
3594 : : /*
3595 : : * If parallel mode started, dead_items and dead_items_info spaces are
3596 : : * allocated in DSM.
3597 : : */
3598 [ + + ]: 128 : if (ParallelVacuumIsActive(vacrel))
3599 : : {
3600 : 12 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3601 : 6 : &vacrel->dead_items_info);
3602 : 6 : return;
3603 : : }
3604 : 122 : }
3605 : :
3606 : : /*
3607 : : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3608 : : * locally.
3609 : : */
3610 : :
3611 : 639 : dead_items_info = palloc_object(VacDeadItemsInfo);
3612 : 639 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3613 : 639 : dead_items_info->num_items = 0;
3614 : 639 : vacrel->dead_items_info = dead_items_info;
3615 : :
3616 : 639 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3617 [ - + ]: 645 : }
3618 : :
3619 : : /*
3620 : : * Add the given block number and offset numbers to dead_items.
3621 : : */
3622 : : static void
3623 : 973 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3624 : : int num_offsets)
3625 : : {
3626 : 973 : const int prog_index[2] = {
3627 : : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3628 : : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3629 : : };
3630 : 973 : int64 prog_val[2];
3631 : :
3632 : 973 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3633 : 973 : vacrel->dead_items_info->num_items += num_offsets;
3634 : :
3635 : : /* update the progress information */
3636 : 973 : prog_val[0] = vacrel->dead_items_info->num_items;
3637 : 973 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3638 : 973 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3639 : 973 : }
3640 : :
3641 : : /*
3642 : : * Forget all collected dead items.
3643 : : */
3644 : : static void
3645 : 65 : dead_items_reset(LVRelState *vacrel)
3646 : : {
3647 : : /* Update statistics for dead items */
3648 : 65 : vacrel->num_dead_items_resets++;
3649 : 65 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3650 : :
3651 [ + + ]: 65 : if (ParallelVacuumIsActive(vacrel))
3652 : : {
3653 : 7 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3654 : 14 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3655 : 7 : &vacrel->dead_items_info);
3656 : 7 : return;
3657 : : }
3658 : :
3659 : : /* Recreate the tidstore with the same max_bytes limitation */
3660 : 58 : TidStoreDestroy(vacrel->dead_items);
3661 : 58 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3662 : :
3663 : : /* Reset the counter */
3664 : 58 : vacrel->dead_items_info->num_items = 0;
3665 : 65 : }
3666 : :
3667 : : /*
3668 : : * Perform cleanup for resources allocated in dead_items_alloc
3669 : : */
3670 : : static void
3671 : 645 : dead_items_cleanup(LVRelState *vacrel)
3672 : : {
3673 [ + + ]: 645 : if (!ParallelVacuumIsActive(vacrel))
3674 : : {
3675 : : /* Don't bother with pfree here */
3676 : 639 : return;
3677 : : }
3678 : :
3679 : : /* End parallel mode */
3680 : 6 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3681 : 6 : vacrel->pvs = NULL;
3682 : 645 : }
3683 : :
3684 : : #ifdef USE_ASSERT_CHECKING
3685 : :
3686 : : /*
3687 : : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3688 : : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3689 : : * reason not to use it outside of asserts.
3690 : : */
3691 : : static bool
3692 : 6477 : heap_page_is_all_visible(Relation rel, Buffer buf,
3693 : : TransactionId OldestXmin,
3694 : : bool *all_frozen,
3695 : : TransactionId *visibility_cutoff_xid,
3696 : : OffsetNumber *logging_offnum)
3697 : : {
3698 : :
3699 : 12954 : return heap_page_would_be_all_visible(rel, buf,
3700 : 6477 : OldestXmin,
3701 : : NULL, 0,
3702 : 6477 : all_frozen,
3703 : 6477 : visibility_cutoff_xid,
3704 : 6477 : logging_offnum);
3705 : : }
3706 : : #endif
3707 : :
3708 : : /*
3709 : : * Check whether the heap page in buf is all-visible except for the dead
3710 : : * tuples referenced in the deadoffsets array.
3711 : : *
3712 : : * Vacuum uses this to check if a page would become all-visible after reaping
3713 : : * known dead tuples. This function does not remove the dead items.
3714 : : *
3715 : : * This cannot be called in a critical section, as the visibility checks may
3716 : : * perform IO and allocate memory.
3717 : : *
3718 : : * Returns true if the page is all-visible other than the provided
3719 : : * deadoffsets and false otherwise.
3720 : : *
3721 : : * OldestXmin is used to determine visibility.
3722 : : *
3723 : : * Output parameters:
3724 : : *
3725 : : * - *all_frozen: true if every tuple on the page is frozen
3726 : : * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3727 : : * - *logging_offnum: OffsetNumber of current tuple being processed;
3728 : : * used by vacuum's error callback system.
3729 : : *
3730 : : * Callers looking to verify that the page is already all-visible can call
3731 : : * heap_page_is_all_visible().
3732 : : *
3733 : : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3734 : : * If you modify this function, ensure consistency with that code. An
3735 : : * assertion cross-checks that both remain in agreement. Do not introduce new
3736 : : * side-effects.
3737 : : */
3738 : : static bool
3739 : 7438 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3740 : : TransactionId OldestXmin,
3741 : : OffsetNumber *deadoffsets,
3742 : : int ndeadoffsets,
3743 : : bool *all_frozen,
3744 : : TransactionId *visibility_cutoff_xid,
3745 : : OffsetNumber *logging_offnum)
3746 : : {
3747 : 7438 : Page page = BufferGetPage(buf);
3748 : 7438 : BlockNumber blockno = BufferGetBlockNumber(buf);
3749 : 7438 : OffsetNumber offnum,
3750 : : maxoff;
3751 : 7438 : bool all_visible = true;
3752 : 7438 : int matched_dead_count = 0;
3753 : :
3754 : 7438 : *visibility_cutoff_xid = InvalidTransactionId;
3755 : 7438 : *all_frozen = true;
3756 : :
3757 [ + + + - ]: 7438 : Assert(ndeadoffsets == 0 || deadoffsets);
3758 : :
3759 : : #ifdef USE_ASSERT_CHECKING
3760 : : /* Confirm input deadoffsets[] is strictly sorted */
3761 [ + + ]: 7438 : if (ndeadoffsets > 1)
3762 : : {
3763 [ + + ]: 69611 : for (int i = 1; i < ndeadoffsets; i++)
3764 [ + - ]: 68702 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3765 : 909 : }
3766 : : #endif
3767 : :
3768 : 7438 : maxoff = PageGetMaxOffsetNumber(page);
3769 [ + + ]: 1532956 : for (offnum = FirstOffsetNumber;
3770 [ + + ]: 766478 : offnum <= maxoff && all_visible;
3771 : 759040 : offnum = OffsetNumberNext(offnum))
3772 : : {
3773 : 759040 : ItemId itemid;
3774 : 759040 : HeapTupleData tuple;
3775 : :
3776 : : /*
3777 : : * Set the offset number so that we can display it along with any
3778 : : * error that occurred while processing this tuple.
3779 : : */
3780 : 759040 : *logging_offnum = offnum;
3781 : 759040 : itemid = PageGetItemId(page, offnum);
3782 : :
3783 : : /* Unused or redirect line pointers are of no interest */
3784 [ + + + + ]: 759040 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3785 : 3233 : continue;
3786 : :
3787 : 755807 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3788 : :
3789 : : /*
3790 : : * Dead line pointers can have index pointers pointing to them. So
3791 : : * they can't be treated as visible
3792 : : */
3793 [ + + ]: 755807 : if (ItemIdIsDead(itemid))
3794 : : {
3795 [ + - ]: 69309 : if (!deadoffsets ||
3796 [ + - - + ]: 69309 : matched_dead_count >= ndeadoffsets ||
3797 : 69309 : deadoffsets[matched_dead_count] != offnum)
3798 : : {
3799 : 0 : *all_frozen = all_visible = false;
3800 : 0 : break;
3801 : : }
3802 : 69309 : matched_dead_count++;
3803 : 69309 : continue;
3804 : : }
3805 : :
3806 [ + - ]: 686498 : Assert(ItemIdIsNormal(itemid));
3807 : :
3808 : 686498 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3809 : 686498 : tuple.t_len = ItemIdGetLength(itemid);
3810 : 686498 : tuple.t_tableOid = RelationGetRelid(rel);
3811 : :
3812 : : /* Visibility checks may do IO or allocate memory */
3813 [ + - ]: 686498 : Assert(CritSectionCount == 0);
3814 [ - + - ]: 686498 : switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3815 : : {
3816 : : case HEAPTUPLE_LIVE:
3817 : : {
3818 : 686498 : TransactionId xmin;
3819 : :
3820 : : /* Check comments in lazy_scan_prune. */
3821 [ + - ]: 686498 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3822 : : {
3823 : 0 : all_visible = false;
3824 : 0 : *all_frozen = false;
3825 : 0 : break;
3826 : : }
3827 : :
3828 : : /*
3829 : : * The inserter definitely committed. But is it old enough
3830 : : * that everyone sees it as committed?
3831 : : */
3832 : 686498 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3833 [ + + ]: 686498 : if (!TransactionIdPrecedes(xmin, OldestXmin))
3834 : : {
3835 : 4 : all_visible = false;
3836 : 4 : *all_frozen = false;
3837 : 4 : break;
3838 : : }
3839 : :
3840 : : /* Track newest xmin on page. */
3841 [ + + + + ]: 686494 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3842 : 123750 : TransactionIdIsNormal(xmin))
3843 : 5727 : *visibility_cutoff_xid = xmin;
3844 : :
3845 : : /* Check whether this tuple is already frozen or not */
3846 [ + - + + : 686494 : if (all_visible && *all_frozen &&
+ + ]
3847 : 122351 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3848 : 4345 : *all_frozen = false;
3849 [ + + ]: 686498 : }
3850 : 686494 : break;
3851 : :
3852 : : case HEAPTUPLE_DEAD:
3853 : : case HEAPTUPLE_RECENTLY_DEAD:
3854 : : case HEAPTUPLE_INSERT_IN_PROGRESS:
3855 : : case HEAPTUPLE_DELETE_IN_PROGRESS:
3856 : : {
3857 : 0 : all_visible = false;
3858 : 0 : *all_frozen = false;
3859 : 0 : break;
3860 : : }
3861 : : default:
3862 [ # # # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3863 : 0 : break;
3864 : : }
3865 [ + - + ]: 759040 : } /* scan along page */
3866 : :
3867 : : /* Clear the offset information once we have processed the given page. */
3868 : 7438 : *logging_offnum = InvalidOffsetNumber;
3869 : :
3870 : 14876 : return all_visible;
3871 : 7438 : }
3872 : :
3873 : : /*
3874 : : * Update index statistics in pg_class if the statistics are accurate.
3875 : : */
3876 : : static void
3877 : 638 : update_relstats_all_indexes(LVRelState *vacrel)
3878 : : {
3879 : 638 : Relation *indrels = vacrel->indrels;
3880 : 638 : int nindexes = vacrel->nindexes;
3881 : 638 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3882 : :
3883 [ + - ]: 638 : Assert(vacrel->do_index_cleanup);
3884 : :
3885 [ + + ]: 1264 : for (int idx = 0; idx < nindexes; idx++)
3886 : : {
3887 : 626 : Relation indrel = indrels[idx];
3888 : 626 : IndexBulkDeleteResult *istat = indstats[idx];
3889 : :
3890 [ + + + + ]: 626 : if (istat == NULL || istat->estimated_count)
3891 : 472 : continue;
3892 : :
3893 : : /* Update index statistics */
3894 : 308 : vac_update_relstats(indrel,
3895 : 154 : istat->num_pages,
3896 : 154 : istat->num_index_tuples,
3897 : : 0, 0,
3898 : : false,
3899 : : InvalidTransactionId,
3900 : : InvalidMultiXactId,
3901 : : NULL, NULL, false);
3902 [ - + + ]: 626 : }
3903 : 638 : }
3904 : :
3905 : : /*
3906 : : * Error context callback for errors occurring during vacuum. The error
3907 : : * context messages for index phases should match the messages set in parallel
3908 : : * vacuum. If you change this function for those phases, change
3909 : : * parallel_vacuum_error_callback() as well.
3910 : : */
3911 : : static void
3912 : 1 : vacuum_error_callback(void *arg)
3913 : : {
3914 : 1 : LVRelState *errinfo = arg;
3915 : :
3916 [ - - - - : 1 : switch (errinfo->phase)
- - + ]
3917 : : {
3918 : : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3919 [ # # ]: 0 : if (BlockNumberIsValid(errinfo->blkno))
3920 : : {
3921 [ # # # # ]: 0 : if (OffsetNumberIsValid(errinfo->offnum))
3922 : 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3923 : 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3924 : : else
3925 : 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3926 : 0 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3927 : 0 : }
3928 : : else
3929 : 0 : errcontext("while scanning relation \"%s.%s\"",
3930 : 0 : errinfo->relnamespace, errinfo->relname);
3931 : 0 : break;
3932 : :
3933 : : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3934 [ # # ]: 0 : if (BlockNumberIsValid(errinfo->blkno))
3935 : : {
3936 [ # # # # ]: 0 : if (OffsetNumberIsValid(errinfo->offnum))
3937 : 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3938 : 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3939 : : else
3940 : 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3941 : 0 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3942 : 0 : }
3943 : : else
3944 : 0 : errcontext("while vacuuming relation \"%s.%s\"",
3945 : 0 : errinfo->relnamespace, errinfo->relname);
3946 : 0 : break;
3947 : :
3948 : : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3949 : 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3950 : 0 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3951 : 0 : break;
3952 : :
3953 : : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3954 : 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3955 : 0 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3956 : 0 : break;
3957 : :
3958 : : case VACUUM_ERRCB_PHASE_TRUNCATE:
3959 [ # # ]: 0 : if (BlockNumberIsValid(errinfo->blkno))
3960 : 0 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3961 : 0 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3962 : 0 : break;
3963 : :
3964 : 1 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3965 : : default:
3966 : 1 : return; /* do nothing; the errinfo may not be
3967 : : * initialized */
3968 : : }
3969 [ - + ]: 1 : }
3970 : :
3971 : : /*
3972 : : * Updates the information required for vacuum error callback. This also saves
3973 : : * the current information which can be later restored via restore_vacuum_error_info.
3974 : : */
3975 : : static void
3976 : 11457 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3977 : : int phase, BlockNumber blkno, OffsetNumber offnum)
3978 : : {
3979 [ + + ]: 11457 : if (saved_vacrel)
3980 : : {
3981 : 1737 : saved_vacrel->offnum = vacrel->offnum;
3982 : 1737 : saved_vacrel->blkno = vacrel->blkno;
3983 : 1737 : saved_vacrel->phase = vacrel->phase;
3984 : 1737 : }
3985 : :
3986 : 11457 : vacrel->blkno = blkno;
3987 : 11457 : vacrel->offnum = offnum;
3988 : 11457 : vacrel->phase = phase;
3989 : 11457 : }
3990 : :
3991 : : /*
3992 : : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3993 : : */
3994 : : static void
3995 : 1737 : restore_vacuum_error_info(LVRelState *vacrel,
3996 : : const LVSavedErrInfo *saved_vacrel)
3997 : : {
3998 : 1737 : vacrel->blkno = saved_vacrel->blkno;
3999 : 1737 : vacrel->offnum = saved_vacrel->offnum;
4000 : 1737 : vacrel->phase = saved_vacrel->phase;
4001 : 1737 : }
|