Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * heapam_xlog.c
4 : : * WAL replay logic for heap access method.
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/heapam_xlog.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/bufmask.h"
18 : : #include "access/heapam.h"
19 : : #include "access/visibilitymap.h"
20 : : #include "access/xlog.h"
21 : : #include "access/xlogutils.h"
22 : : #include "storage/freespace.h"
23 : : #include "storage/standby.h"
24 : :
25 : :
26 : : /*
27 : : * Replay XLOG_HEAP2_PRUNE_* records.
28 : : */
29 : : static void
30 : 0 : heap_xlog_prune_freeze(XLogReaderState *record)
31 : : {
32 : 0 : XLogRecPtr lsn = record->EndRecPtr;
33 : 0 : char *maindataptr = XLogRecGetData(record);
34 : 0 : xl_heap_prune xlrec;
35 : 0 : Buffer buffer;
36 : 0 : RelFileLocator rlocator;
37 : 0 : BlockNumber blkno;
38 : 0 : Buffer vmbuffer = InvalidBuffer;
39 : 0 : uint8 vmflags = 0;
40 : 0 : Size freespace = 0;
41 : :
42 : 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
43 : 0 : memcpy(&xlrec, maindataptr, SizeOfHeapPrune);
44 : 0 : maindataptr += SizeOfHeapPrune;
45 : :
46 : : /*
47 : : * We will take an ordinary exclusive lock or a cleanup lock depending on
48 : : * whether the XLHP_CLEANUP_LOCK flag is set. With an ordinary exclusive
49 : : * lock, we better not be doing anything that requires moving existing
50 : : * tuple data.
51 : : */
52 [ # # # # ]: 0 : Assert((xlrec.flags & XLHP_CLEANUP_LOCK) != 0 ||
53 : : (xlrec.flags & (XLHP_HAS_REDIRECTIONS | XLHP_HAS_DEAD_ITEMS)) == 0);
54 : :
55 [ # # ]: 0 : if (xlrec.flags & XLHP_VM_ALL_VISIBLE)
56 : : {
57 : 0 : vmflags = VISIBILITYMAP_ALL_VISIBLE;
58 [ # # ]: 0 : if (xlrec.flags & XLHP_VM_ALL_FROZEN)
59 : 0 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
60 : 0 : }
61 : :
62 : : /*
63 : : * After xl_heap_prune is the optional snapshot conflict horizon.
64 : : *
65 : : * In Hot Standby mode, we must ensure that there are no running queries
66 : : * which would conflict with the changes in this record. That means we
67 : : * can't replay this record if it removes tuples that are still visible to
68 : : * transactions on the standby, freeze tuples with xids that are still
69 : : * considered running on the standby, or set a page as all-visible in the
70 : : * VM if it isn't all-visible to all transactions on the standby.
71 : : */
72 [ # # ]: 0 : if ((xlrec.flags & XLHP_HAS_CONFLICT_HORIZON) != 0)
73 : : {
74 : 0 : TransactionId snapshot_conflict_horizon;
75 : :
76 : : /* memcpy() because snapshot_conflict_horizon is stored unaligned */
77 : 0 : memcpy(&snapshot_conflict_horizon, maindataptr, sizeof(TransactionId));
78 : 0 : maindataptr += sizeof(TransactionId);
79 : :
80 [ # # ]: 0 : if (InHotStandby)
81 : 0 : ResolveRecoveryConflictWithSnapshot(snapshot_conflict_horizon,
82 : 0 : (xlrec.flags & XLHP_IS_CATALOG_REL) != 0,
83 : : rlocator);
84 : 0 : }
85 : :
86 : : /*
87 : : * If we have a full-page image of the heap block, restore it and we're
88 : : * done with the heap block.
89 : : */
90 : 0 : if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL,
91 : 0 : (xlrec.flags & XLHP_CLEANUP_LOCK) != 0,
92 [ # # ]: 0 : &buffer) == BLK_NEEDS_REDO)
93 : : {
94 : 0 : Page page = BufferGetPage(buffer);
95 : 0 : OffsetNumber *redirected;
96 : 0 : OffsetNumber *nowdead;
97 : 0 : OffsetNumber *nowunused;
98 : 0 : int nredirected;
99 : 0 : int ndead;
100 : 0 : int nunused;
101 : 0 : int nplans;
102 : 0 : Size datalen;
103 : 0 : xlhp_freeze_plan *plans;
104 : 0 : OffsetNumber *frz_offsets;
105 : 0 : char *dataptr = XLogRecGetBlockData(record, 0, &datalen);
106 : 0 : bool do_prune;
107 : :
108 : 0 : heap_xlog_deserialize_prune_and_freeze(dataptr, xlrec.flags,
109 : : &nplans, &plans, &frz_offsets,
110 : : &nredirected, &redirected,
111 : : &ndead, &nowdead,
112 : : &nunused, &nowunused);
113 : :
114 [ # # # # ]: 0 : do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
115 : :
116 : : /* Ensure the record does something */
117 [ # # # # : 0 : Assert(do_prune || nplans > 0 || vmflags & VISIBILITYMAP_VALID_BITS);
# # ]
118 : :
119 : : /*
120 : : * Update all line pointers per the record, and repair fragmentation
121 : : * if needed.
122 : : */
123 [ # # ]: 0 : if (do_prune)
124 : 0 : heap_page_prune_execute(buffer,
125 : 0 : (xlrec.flags & XLHP_CLEANUP_LOCK) == 0,
126 : 0 : redirected, nredirected,
127 : 0 : nowdead, ndead,
128 : 0 : nowunused, nunused);
129 : :
130 : : /* Freeze tuples */
131 [ # # ]: 0 : for (int p = 0; p < nplans; p++)
132 : : {
133 : 0 : HeapTupleFreeze frz;
134 : :
135 : : /*
136 : : * Convert freeze plan representation from WAL record into
137 : : * per-tuple format used by heap_execute_freeze_tuple
138 : : */
139 : 0 : frz.xmax = plans[p].xmax;
140 : 0 : frz.t_infomask2 = plans[p].t_infomask2;
141 : 0 : frz.t_infomask = plans[p].t_infomask;
142 : 0 : frz.frzflags = plans[p].frzflags;
143 : 0 : frz.offset = InvalidOffsetNumber; /* unused, but be tidy */
144 : :
145 [ # # ]: 0 : for (int i = 0; i < plans[p].ntuples; i++)
146 : : {
147 : 0 : OffsetNumber offset = *(frz_offsets++);
148 : 0 : ItemId lp;
149 : 0 : HeapTupleHeader tuple;
150 : :
151 : 0 : lp = PageGetItemId(page, offset);
152 : 0 : tuple = (HeapTupleHeader) PageGetItem(page, lp);
153 : 0 : heap_execute_freeze_tuple(tuple, &frz);
154 : 0 : }
155 : 0 : }
156 : :
157 : : /* There should be no more data */
158 [ # # ]: 0 : Assert((char *) frz_offsets == dataptr + datalen);
159 : :
160 : : /*
161 : : * The critical integrity requirement here is that we must never end
162 : : * up with the visibility map bit set and the page-level
163 : : * PD_ALL_VISIBLE bit unset. If that were to occur, a subsequent page
164 : : * modification would fail to clear the visibility map bit.
165 : : */
166 [ # # ]: 0 : if (vmflags & VISIBILITYMAP_VALID_BITS)
167 : 0 : PageSetAllVisible(page);
168 : :
169 : 0 : MarkBufferDirty(buffer);
170 : :
171 : : /*
172 : : * See log_heap_prune_and_freeze() for commentary on when we set the
173 : : * heap page LSN.
174 : : */
175 [ # # # # : 0 : if (do_prune || nplans > 0 ||
# # ]
176 [ # # # # ]: 0 : ((vmflags & VISIBILITYMAP_VALID_BITS) && XLogHintBitIsNeeded()))
177 : 0 : PageSetLSN(page, lsn);
178 : :
179 : : /*
180 : : * Note: we don't worry about updating the page's prunability hints.
181 : : * At worst this will cause an extra prune cycle to occur soon.
182 : : */
183 : 0 : }
184 : :
185 : : /*
186 : : * If we 1) released any space or line pointers or 2) set PD_ALL_VISIBLE
187 : : * or the VM, update the freespace map.
188 : : *
189 : : * Even when no actual space is freed (when only marking the page
190 : : * all-visible or frozen), we still update the FSM. Because the FSM is
191 : : * unlogged and maintained heuristically, it often becomes stale on
192 : : * standbys. If such a standby is later promoted and runs VACUUM, it will
193 : : * skip recalculating free space for pages that were marked
194 : : * all-visible/all-frozen. FreeSpaceMapVacuum() can then propagate overly
195 : : * optimistic free space values upward, causing future insertions to
196 : : * select pages that turn out to be unusable. In bulk, this can lead to
197 : : * long stalls.
198 : : *
199 : : * To prevent this, always update the FSM even when only marking a page
200 : : * all-visible/all-frozen.
201 : : *
202 : : * Do this regardless of whether a full-page image is logged, since FSM
203 : : * data is not part of the page itself.
204 : : */
205 [ # # ]: 0 : if (BufferIsValid(buffer))
206 : : {
207 : 0 : if ((xlrec.flags & (XLHP_HAS_REDIRECTIONS |
208 : : XLHP_HAS_DEAD_ITEMS |
209 [ # # # # ]: 0 : XLHP_HAS_NOW_UNUSED_ITEMS)) ||
210 : 0 : (vmflags & VISIBILITYMAP_VALID_BITS))
211 : 0 : freespace = PageGetHeapFreeSpace(BufferGetPage(buffer));
212 : :
213 : : /*
214 : : * We want to avoid holding an exclusive lock on the heap buffer while
215 : : * doing IO (either of the FSM or the VM), so we'll release it now.
216 : : */
217 : 0 : UnlockReleaseBuffer(buffer);
218 : 0 : }
219 : :
220 : : /*
221 : : * Now read and update the VM block.
222 : : *
223 : : * We must redo changes to the VM even if the heap page was skipped due to
224 : : * LSN interlock. See comment in heap_xlog_multi_insert() for more details
225 : : * on replaying changes to the VM.
226 : : */
227 [ # # # # ]: 0 : if ((vmflags & VISIBILITYMAP_VALID_BITS) &&
228 : 0 : XLogReadBufferForRedoExtended(record, 1,
229 : : RBM_ZERO_ON_ERROR,
230 : : false,
231 : 0 : &vmbuffer) == BLK_NEEDS_REDO)
232 : : {
233 : 0 : Page vmpage = BufferGetPage(vmbuffer);
234 : :
235 : : /* initialize the page if it was read as zeros */
236 [ # # ]: 0 : if (PageIsNew(vmpage))
237 : 0 : PageInit(vmpage, BLCKSZ, 0);
238 : :
239 : 0 : visibilitymap_set_vmbits(blkno, vmbuffer, vmflags, rlocator);
240 : :
241 [ # # ]: 0 : Assert(BufferIsDirty(vmbuffer));
242 : 0 : PageSetLSN(vmpage, lsn);
243 : 0 : }
244 : :
245 [ # # ]: 0 : if (BufferIsValid(vmbuffer))
246 : 0 : UnlockReleaseBuffer(vmbuffer);
247 : :
248 [ # # ]: 0 : if (freespace > 0)
249 : 0 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
250 : 0 : }
251 : :
252 : : /*
253 : : * Replay XLOG_HEAP2_VISIBLE records.
254 : : *
255 : : * The critical integrity requirement here is that we must never end up with
256 : : * a situation where the visibility map bit is set, and the page-level
257 : : * PD_ALL_VISIBLE bit is clear. If that were to occur, then a subsequent
258 : : * page modification would fail to clear the visibility map bit.
259 : : */
260 : : static void
261 : 0 : heap_xlog_visible(XLogReaderState *record)
262 : : {
263 : 0 : XLogRecPtr lsn = record->EndRecPtr;
264 : 0 : xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
265 : 0 : Buffer vmbuffer = InvalidBuffer;
266 : 0 : Buffer buffer;
267 : 0 : Page page;
268 : 0 : RelFileLocator rlocator;
269 : 0 : BlockNumber blkno;
270 : 0 : XLogRedoAction action;
271 : :
272 [ # # ]: 0 : Assert((xlrec->flags & VISIBILITYMAP_XLOG_VALID_BITS) == xlrec->flags);
273 : :
274 : 0 : XLogRecGetBlockTag(record, 1, &rlocator, NULL, &blkno);
275 : :
276 : : /*
277 : : * If there are any Hot Standby transactions running that have an xmin
278 : : * horizon old enough that this page isn't all-visible for them, they
279 : : * might incorrectly decide that an index-only scan can skip a heap fetch.
280 : : *
281 : : * NB: It might be better to throw some kind of "soft" conflict here that
282 : : * forces any index-only scan that is in flight to perform heap fetches,
283 : : * rather than killing the transaction outright.
284 : : */
285 [ # # ]: 0 : if (InHotStandby)
286 : 0 : ResolveRecoveryConflictWithSnapshot(xlrec->snapshotConflictHorizon,
287 : 0 : xlrec->flags & VISIBILITYMAP_XLOG_CATALOG_REL,
288 : : rlocator);
289 : :
290 : : /*
291 : : * Read the heap page, if it still exists. If the heap file has dropped or
292 : : * truncated later in recovery, we don't need to update the page, but we'd
293 : : * better still update the visibility map.
294 : : */
295 : 0 : action = XLogReadBufferForRedo(record, 1, &buffer);
296 [ # # ]: 0 : if (action == BLK_NEEDS_REDO)
297 : : {
298 : : /*
299 : : * We don't bump the LSN of the heap page when setting the visibility
300 : : * map bit (unless checksums or wal_hint_bits is enabled, in which
301 : : * case we must). This exposes us to torn page hazards, but since
302 : : * we're not inspecting the existing page contents in any way, we
303 : : * don't care.
304 : : */
305 : 0 : page = BufferGetPage(buffer);
306 : :
307 : 0 : PageSetAllVisible(page);
308 : :
309 [ # # # # ]: 0 : if (XLogHintBitIsNeeded())
310 : 0 : PageSetLSN(page, lsn);
311 : :
312 : 0 : MarkBufferDirty(buffer);
313 : 0 : }
314 [ # # ]: 0 : else if (action == BLK_RESTORED)
315 : : {
316 : : /*
317 : : * If heap block was backed up, we already restored it and there's
318 : : * nothing more to do. (This can only happen with checksums or
319 : : * wal_log_hints enabled.)
320 : : */
321 : 0 : }
322 : :
323 [ # # ]: 0 : if (BufferIsValid(buffer))
324 : : {
325 : 0 : Size space = PageGetFreeSpace(BufferGetPage(buffer));
326 : :
327 : 0 : UnlockReleaseBuffer(buffer);
328 : :
329 : : /*
330 : : * Since FSM is not WAL-logged and only updated heuristically, it
331 : : * easily becomes stale in standbys. If the standby is later promoted
332 : : * and runs VACUUM, it will skip updating individual free space
333 : : * figures for pages that became all-visible (or all-frozen, depending
334 : : * on the vacuum mode,) which is troublesome when FreeSpaceMapVacuum
335 : : * propagates too optimistic free space values to upper FSM layers;
336 : : * later inserters try to use such pages only to find out that they
337 : : * are unusable. This can cause long stalls when there are many such
338 : : * pages.
339 : : *
340 : : * Forestall those problems by updating FSM's idea about a page that
341 : : * is becoming all-visible or all-frozen.
342 : : *
343 : : * Do this regardless of a full-page image being applied, since the
344 : : * FSM data is not in the page anyway.
345 : : */
346 [ # # ]: 0 : if (xlrec->flags & VISIBILITYMAP_VALID_BITS)
347 : 0 : XLogRecordPageWithFreeSpace(rlocator, blkno, space);
348 : 0 : }
349 : :
350 : : /*
351 : : * Even if we skipped the heap page update due to the LSN interlock, it's
352 : : * still safe to update the visibility map. Any WAL record that clears
353 : : * the visibility map bit does so before checking the page LSN, so any
354 : : * bits that need to be cleared will still be cleared.
355 : : */
356 : 0 : if (XLogReadBufferForRedoExtended(record, 0, RBM_ZERO_ON_ERROR, false,
357 [ # # ]: 0 : &vmbuffer) == BLK_NEEDS_REDO)
358 : : {
359 : 0 : Page vmpage = BufferGetPage(vmbuffer);
360 : 0 : Relation reln;
361 : 0 : uint8 vmbits;
362 : :
363 : : /* initialize the page if it was read as zeros */
364 [ # # ]: 0 : if (PageIsNew(vmpage))
365 : 0 : PageInit(vmpage, BLCKSZ, 0);
366 : :
367 : : /* remove VISIBILITYMAP_XLOG_* */
368 : 0 : vmbits = xlrec->flags & VISIBILITYMAP_VALID_BITS;
369 : :
370 : : /*
371 : : * XLogReadBufferForRedoExtended locked the buffer. But
372 : : * visibilitymap_set will handle locking itself.
373 : : */
374 : 0 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
375 : :
376 : 0 : reln = CreateFakeRelcacheEntry(rlocator);
377 : :
378 : 0 : visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
379 : 0 : xlrec->snapshotConflictHorizon, vmbits);
380 : :
381 : 0 : ReleaseBuffer(vmbuffer);
382 : 0 : FreeFakeRelcacheEntry(reln);
383 : 0 : }
384 [ # # ]: 0 : else if (BufferIsValid(vmbuffer))
385 : 0 : UnlockReleaseBuffer(vmbuffer);
386 : 0 : }
387 : :
388 : : /*
389 : : * Given an "infobits" field from an XLog record, set the correct bits in the
390 : : * given infomask and infomask2 for the tuple touched by the record.
391 : : *
392 : : * (This is the reverse of compute_infobits).
393 : : */
394 : : static void
395 : 0 : fix_infomask_from_infobits(uint8 infobits, uint16 *infomask, uint16 *infomask2)
396 : : {
397 : 0 : *infomask &= ~(HEAP_XMAX_IS_MULTI | HEAP_XMAX_LOCK_ONLY |
398 : : HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_EXCL_LOCK);
399 : 0 : *infomask2 &= ~HEAP_KEYS_UPDATED;
400 : :
401 [ # # ]: 0 : if (infobits & XLHL_XMAX_IS_MULTI)
402 : 0 : *infomask |= HEAP_XMAX_IS_MULTI;
403 [ # # ]: 0 : if (infobits & XLHL_XMAX_LOCK_ONLY)
404 : 0 : *infomask |= HEAP_XMAX_LOCK_ONLY;
405 [ # # ]: 0 : if (infobits & XLHL_XMAX_EXCL_LOCK)
406 : 0 : *infomask |= HEAP_XMAX_EXCL_LOCK;
407 : : /* note HEAP_XMAX_SHR_LOCK isn't considered here */
408 [ # # ]: 0 : if (infobits & XLHL_XMAX_KEYSHR_LOCK)
409 : 0 : *infomask |= HEAP_XMAX_KEYSHR_LOCK;
410 : :
411 [ # # ]: 0 : if (infobits & XLHL_KEYS_UPDATED)
412 : 0 : *infomask2 |= HEAP_KEYS_UPDATED;
413 : 0 : }
414 : :
415 : : /*
416 : : * Replay XLOG_HEAP_DELETE records.
417 : : */
418 : : static void
419 : 0 : heap_xlog_delete(XLogReaderState *record)
420 : : {
421 : 0 : XLogRecPtr lsn = record->EndRecPtr;
422 : 0 : xl_heap_delete *xlrec = (xl_heap_delete *) XLogRecGetData(record);
423 : 0 : Buffer buffer;
424 : 0 : Page page;
425 : 0 : ItemId lp;
426 : 0 : HeapTupleHeader htup;
427 : 0 : BlockNumber blkno;
428 : 0 : RelFileLocator target_locator;
429 : 0 : ItemPointerData target_tid;
430 : :
431 : 0 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
432 : 0 : ItemPointerSetBlockNumber(&target_tid, blkno);
433 : 0 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
434 : :
435 : : /*
436 : : * The visibility map may need to be fixed even if the heap page is
437 : : * already up-to-date.
438 : : */
439 [ # # ]: 0 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
440 : : {
441 : 0 : Relation reln = CreateFakeRelcacheEntry(target_locator);
442 : 0 : Buffer vmbuffer = InvalidBuffer;
443 : :
444 : 0 : visibilitymap_pin(reln, blkno, &vmbuffer);
445 : 0 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
446 : 0 : ReleaseBuffer(vmbuffer);
447 : 0 : FreeFakeRelcacheEntry(reln);
448 : 0 : }
449 : :
450 [ # # ]: 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
451 : : {
452 : 0 : page = BufferGetPage(buffer);
453 : :
454 [ # # ]: 0 : if (xlrec->offnum < 1 || xlrec->offnum > PageGetMaxOffsetNumber(page))
455 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
456 : 0 : lp = PageGetItemId(page, xlrec->offnum);
457 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
458 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
459 : :
460 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
461 : :
462 : 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
463 : 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
464 : 0 : HeapTupleHeaderClearHotUpdated(htup);
465 : 0 : fix_infomask_from_infobits(xlrec->infobits_set,
466 : 0 : &htup->t_infomask, &htup->t_infomask2);
467 [ # # ]: 0 : if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
468 : 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
469 : : else
470 : 0 : HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
471 : 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
472 : :
473 : : /* Mark the page as a candidate for pruning */
474 [ # # # # : 0 : PageSetPrunable(page, XLogRecGetXid(record));
# # ]
475 : :
476 [ # # ]: 0 : if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
477 : 0 : PageClearAllVisible(page);
478 : :
479 : : /* Make sure t_ctid is set correctly */
480 [ # # ]: 0 : if (xlrec->flags & XLH_DELETE_IS_PARTITION_MOVE)
481 : 0 : HeapTupleHeaderSetMovedPartitions(htup);
482 : : else
483 : 0 : htup->t_ctid = target_tid;
484 : 0 : PageSetLSN(page, lsn);
485 : 0 : MarkBufferDirty(buffer);
486 : 0 : }
487 [ # # ]: 0 : if (BufferIsValid(buffer))
488 : 0 : UnlockReleaseBuffer(buffer);
489 : 0 : }
490 : :
491 : : /*
492 : : * Replay XLOG_HEAP_INSERT records.
493 : : */
494 : : static void
495 : 0 : heap_xlog_insert(XLogReaderState *record)
496 : : {
497 : 0 : XLogRecPtr lsn = record->EndRecPtr;
498 : 0 : xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
499 : 0 : Buffer buffer;
500 : 0 : Page page;
501 : 0 : union
502 : : {
503 : : HeapTupleHeaderData hdr;
504 : : char data[MaxHeapTupleSize];
505 : : } tbuf;
506 : 0 : HeapTupleHeader htup;
507 : 0 : xl_heap_header xlhdr;
508 : 0 : uint32 newlen;
509 : 0 : Size freespace = 0;
510 : 0 : RelFileLocator target_locator;
511 : 0 : BlockNumber blkno;
512 : 0 : ItemPointerData target_tid;
513 : 0 : XLogRedoAction action;
514 : :
515 : 0 : XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
516 : 0 : ItemPointerSetBlockNumber(&target_tid, blkno);
517 : 0 : ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
518 : :
519 : : /* No freezing in the heap_insert() code path */
520 [ # # ]: 0 : Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
521 : :
522 : : /*
523 : : * The visibility map may need to be fixed even if the heap page is
524 : : * already up-to-date.
525 : : */
526 [ # # ]: 0 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
527 : : {
528 : 0 : Relation reln = CreateFakeRelcacheEntry(target_locator);
529 : 0 : Buffer vmbuffer = InvalidBuffer;
530 : :
531 : 0 : visibilitymap_pin(reln, blkno, &vmbuffer);
532 : 0 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
533 : 0 : ReleaseBuffer(vmbuffer);
534 : 0 : FreeFakeRelcacheEntry(reln);
535 : 0 : }
536 : :
537 : : /*
538 : : * If we inserted the first and only tuple on the page, re-initialize the
539 : : * page from scratch.
540 : : */
541 [ # # ]: 0 : if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
542 : : {
543 : 0 : buffer = XLogInitBufferForRedo(record, 0);
544 : 0 : page = BufferGetPage(buffer);
545 : 0 : PageInit(page, BufferGetPageSize(buffer), 0);
546 : 0 : action = BLK_NEEDS_REDO;
547 : 0 : }
548 : : else
549 : 0 : action = XLogReadBufferForRedo(record, 0, &buffer);
550 [ # # ]: 0 : if (action == BLK_NEEDS_REDO)
551 : : {
552 : 0 : Size datalen;
553 : 0 : char *data;
554 : :
555 : 0 : page = BufferGetPage(buffer);
556 : :
557 [ # # ]: 0 : if (PageGetMaxOffsetNumber(page) + 1 < xlrec->offnum)
558 [ # # # # ]: 0 : elog(PANIC, "invalid max offset number");
559 : :
560 : 0 : data = XLogRecGetBlockData(record, 0, &datalen);
561 : :
562 : 0 : newlen = datalen - SizeOfHeapHeader;
563 [ # # ]: 0 : Assert(datalen > SizeOfHeapHeader && newlen <= MaxHeapTupleSize);
564 : 0 : memcpy(&xlhdr, data, SizeOfHeapHeader);
565 : 0 : data += SizeOfHeapHeader;
566 : :
567 : 0 : htup = &tbuf.hdr;
568 [ # # # # : 0 : MemSet(htup, 0, SizeofHeapTupleHeader);
# # # # #
# ]
569 : : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
570 : 0 : memcpy((char *) htup + SizeofHeapTupleHeader,
571 : : data,
572 : : newlen);
573 : 0 : newlen += SizeofHeapTupleHeader;
574 : 0 : htup->t_infomask2 = xlhdr.t_infomask2;
575 : 0 : htup->t_infomask = xlhdr.t_infomask;
576 : 0 : htup->t_hoff = xlhdr.t_hoff;
577 : 0 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
578 : 0 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
579 : 0 : htup->t_ctid = target_tid;
580 : :
581 [ # # ]: 0 : if (PageAddItem(page, htup, newlen, xlrec->offnum, true, true) == InvalidOffsetNumber)
582 [ # # # # ]: 0 : elog(PANIC, "failed to add tuple");
583 : :
584 : 0 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
585 : :
586 : 0 : PageSetLSN(page, lsn);
587 : :
588 [ # # ]: 0 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
589 : 0 : PageClearAllVisible(page);
590 : :
591 : 0 : MarkBufferDirty(buffer);
592 : 0 : }
593 [ # # ]: 0 : if (BufferIsValid(buffer))
594 : 0 : UnlockReleaseBuffer(buffer);
595 : :
596 : : /*
597 : : * If the page is running low on free space, update the FSM as well.
598 : : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
599 : : * better than that without knowing the fill-factor for the table.
600 : : *
601 : : * XXX: Don't do this if the page was restored from full page image. We
602 : : * don't bother to update the FSM in that case, it doesn't need to be
603 : : * totally accurate anyway.
604 : : */
605 [ # # # # ]: 0 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
606 : 0 : XLogRecordPageWithFreeSpace(target_locator, blkno, freespace);
607 : 0 : }
608 : :
609 : : /*
610 : : * Replay XLOG_HEAP2_MULTI_INSERT records.
611 : : */
612 : : static void
613 : 0 : heap_xlog_multi_insert(XLogReaderState *record)
614 : : {
615 : 0 : XLogRecPtr lsn = record->EndRecPtr;
616 : 0 : xl_heap_multi_insert *xlrec;
617 : 0 : RelFileLocator rlocator;
618 : 0 : BlockNumber blkno;
619 : 0 : Buffer buffer;
620 : 0 : Page page;
621 : 0 : union
622 : : {
623 : : HeapTupleHeaderData hdr;
624 : : char data[MaxHeapTupleSize];
625 : : } tbuf;
626 : 0 : HeapTupleHeader htup;
627 : 0 : uint32 newlen;
628 : 0 : Size freespace = 0;
629 : 0 : int i;
630 : 0 : bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
631 : 0 : XLogRedoAction action;
632 : 0 : Buffer vmbuffer = InvalidBuffer;
633 : :
634 : : /*
635 : : * Insertion doesn't overwrite MVCC data, so no conflict processing is
636 : : * required.
637 : : */
638 : 0 : xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
639 : :
640 : 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
641 : :
642 : : /* check that the mutually exclusive flags are not both set */
643 [ # # # # ]: 0 : Assert(!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
644 : : (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
645 : :
646 : : /*
647 : : * The visibility map may need to be fixed even if the heap page is
648 : : * already up-to-date.
649 : : */
650 [ # # ]: 0 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
651 : : {
652 : 0 : Relation reln = CreateFakeRelcacheEntry(rlocator);
653 : :
654 : 0 : visibilitymap_pin(reln, blkno, &vmbuffer);
655 : 0 : visibilitymap_clear(reln, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS);
656 : 0 : ReleaseBuffer(vmbuffer);
657 : 0 : vmbuffer = InvalidBuffer;
658 : 0 : FreeFakeRelcacheEntry(reln);
659 : 0 : }
660 : :
661 [ # # ]: 0 : if (isinit)
662 : : {
663 : 0 : buffer = XLogInitBufferForRedo(record, 0);
664 : 0 : page = BufferGetPage(buffer);
665 : 0 : PageInit(page, BufferGetPageSize(buffer), 0);
666 : 0 : action = BLK_NEEDS_REDO;
667 : 0 : }
668 : : else
669 : 0 : action = XLogReadBufferForRedo(record, 0, &buffer);
670 [ # # ]: 0 : if (action == BLK_NEEDS_REDO)
671 : : {
672 : 0 : char *tupdata;
673 : 0 : char *endptr;
674 : 0 : Size len;
675 : :
676 : : /* Tuples are stored as block data */
677 : 0 : tupdata = XLogRecGetBlockData(record, 0, &len);
678 : 0 : endptr = tupdata + len;
679 : :
680 : 0 : page = BufferGetPage(buffer);
681 : :
682 [ # # ]: 0 : for (i = 0; i < xlrec->ntuples; i++)
683 : : {
684 : 0 : OffsetNumber offnum;
685 : 0 : xl_multi_insert_tuple *xlhdr;
686 : :
687 : : /*
688 : : * If we're reinitializing the page, the tuples are stored in
689 : : * order from FirstOffsetNumber. Otherwise there's an array of
690 : : * offsets in the WAL record, and the tuples come after that.
691 : : */
692 [ # # ]: 0 : if (isinit)
693 : 0 : offnum = FirstOffsetNumber + i;
694 : : else
695 : 0 : offnum = xlrec->offsets[i];
696 [ # # ]: 0 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
697 [ # # # # ]: 0 : elog(PANIC, "invalid max offset number");
698 : :
699 : 0 : xlhdr = (xl_multi_insert_tuple *) SHORTALIGN(tupdata);
700 : 0 : tupdata = ((char *) xlhdr) + SizeOfMultiInsertTuple;
701 : :
702 : 0 : newlen = xlhdr->datalen;
703 [ # # ]: 0 : Assert(newlen <= MaxHeapTupleSize);
704 : 0 : htup = &tbuf.hdr;
705 [ # # # # : 0 : MemSet(htup, 0, SizeofHeapTupleHeader);
# # # # #
# ]
706 : : /* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
707 : 0 : memcpy((char *) htup + SizeofHeapTupleHeader,
708 : : tupdata,
709 : : newlen);
710 : 0 : tupdata += newlen;
711 : :
712 : 0 : newlen += SizeofHeapTupleHeader;
713 : 0 : htup->t_infomask2 = xlhdr->t_infomask2;
714 : 0 : htup->t_infomask = xlhdr->t_infomask;
715 : 0 : htup->t_hoff = xlhdr->t_hoff;
716 : 0 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
717 : 0 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
718 : 0 : ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
719 : 0 : ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
720 : :
721 : 0 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
722 [ # # ]: 0 : if (offnum == InvalidOffsetNumber)
723 [ # # # # ]: 0 : elog(PANIC, "failed to add tuple");
724 : 0 : }
725 [ # # ]: 0 : if (tupdata != endptr)
726 [ # # # # ]: 0 : elog(PANIC, "total tuple length mismatch");
727 : :
728 : 0 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
729 : :
730 : 0 : PageSetLSN(page, lsn);
731 : :
732 [ # # ]: 0 : if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
733 : 0 : PageClearAllVisible(page);
734 : :
735 : : /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
736 [ # # ]: 0 : if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
737 : 0 : PageSetAllVisible(page);
738 : :
739 : 0 : MarkBufferDirty(buffer);
740 : 0 : }
741 [ # # ]: 0 : if (BufferIsValid(buffer))
742 : 0 : UnlockReleaseBuffer(buffer);
743 : :
744 : 0 : buffer = InvalidBuffer;
745 : :
746 : : /*
747 : : * Read and update the visibility map (VM) block.
748 : : *
749 : : * We must always redo VM changes, even if the corresponding heap page
750 : : * update was skipped due to the LSN interlock. Each VM block covers
751 : : * multiple heap pages, so later WAL records may update other bits in the
752 : : * same block. If this record includes an FPI (full-page image),
753 : : * subsequent WAL records may depend on it to guard against torn pages.
754 : : *
755 : : * Heap page changes are replayed first to preserve the invariant:
756 : : * PD_ALL_VISIBLE must be set on the heap page if the VM bit is set.
757 : : *
758 : : * Note that we released the heap page lock above. During normal
759 : : * operation, this would be unsafe — a concurrent modification could
760 : : * clear PD_ALL_VISIBLE while the VM bit remained set, violating the
761 : : * invariant.
762 : : *
763 : : * During recovery, however, no concurrent writers exist. Therefore,
764 : : * updating the VM without holding the heap page lock is safe enough. This
765 : : * same approach is taken when replaying xl_heap_visible records (see
766 : : * heap_xlog_visible()).
767 : : */
768 [ # # # # ]: 0 : if ((xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) &&
769 : 0 : XLogReadBufferForRedoExtended(record, 1, RBM_ZERO_ON_ERROR, false,
770 : 0 : &vmbuffer) == BLK_NEEDS_REDO)
771 : : {
772 : 0 : Page vmpage = BufferGetPage(vmbuffer);
773 : :
774 : : /* initialize the page if it was read as zeros */
775 [ # # ]: 0 : if (PageIsNew(vmpage))
776 : 0 : PageInit(vmpage, BLCKSZ, 0);
777 : :
778 : 0 : visibilitymap_set_vmbits(blkno,
779 : 0 : vmbuffer,
780 : : VISIBILITYMAP_ALL_VISIBLE |
781 : : VISIBILITYMAP_ALL_FROZEN,
782 : : rlocator);
783 : :
784 [ # # ]: 0 : Assert(BufferIsDirty(vmbuffer));
785 : 0 : PageSetLSN(vmpage, lsn);
786 : 0 : }
787 : :
788 [ # # ]: 0 : if (BufferIsValid(vmbuffer))
789 : 0 : UnlockReleaseBuffer(vmbuffer);
790 : :
791 : : /*
792 : : * If the page is running low on free space, update the FSM as well.
793 : : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
794 : : * better than that without knowing the fill-factor for the table.
795 : : *
796 : : * XXX: Don't do this if the page was restored from full page image. We
797 : : * don't bother to update the FSM in that case, it doesn't need to be
798 : : * totally accurate anyway.
799 : : */
800 [ # # # # ]: 0 : if (action == BLK_NEEDS_REDO && freespace < BLCKSZ / 5)
801 : 0 : XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
802 : 0 : }
803 : :
804 : : /*
805 : : * Replay XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE records.
806 : : */
807 : : static void
808 : 0 : heap_xlog_update(XLogReaderState *record, bool hot_update)
809 : : {
810 : 0 : XLogRecPtr lsn = record->EndRecPtr;
811 : 0 : xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
812 : 0 : RelFileLocator rlocator;
813 : 0 : BlockNumber oldblk;
814 : 0 : BlockNumber newblk;
815 : 0 : ItemPointerData newtid;
816 : 0 : Buffer obuffer,
817 : : nbuffer;
818 : 0 : Page page;
819 : 0 : OffsetNumber offnum;
820 : 0 : ItemId lp;
821 : 0 : HeapTupleData oldtup;
822 : 0 : HeapTupleHeader htup;
823 : 0 : uint16 prefixlen = 0,
824 : 0 : suffixlen = 0;
825 : 0 : char *newp;
826 : 0 : union
827 : : {
828 : : HeapTupleHeaderData hdr;
829 : : char data[MaxHeapTupleSize];
830 : : } tbuf;
831 : 0 : xl_heap_header xlhdr;
832 : 0 : uint32 newlen;
833 : 0 : Size freespace = 0;
834 : 0 : XLogRedoAction oldaction;
835 : 0 : XLogRedoAction newaction;
836 : :
837 : : /* initialize to keep the compiler quiet */
838 : 0 : oldtup.t_data = NULL;
839 : 0 : oldtup.t_len = 0;
840 : :
841 : 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &newblk);
842 [ # # ]: 0 : if (XLogRecGetBlockTagExtended(record, 1, NULL, NULL, &oldblk, NULL))
843 : : {
844 : : /* HOT updates are never done across pages */
845 [ # # ]: 0 : Assert(!hot_update);
846 : 0 : }
847 : : else
848 : 0 : oldblk = newblk;
849 : :
850 : 0 : ItemPointerSet(&newtid, newblk, xlrec->new_offnum);
851 : :
852 : : /*
853 : : * The visibility map may need to be fixed even if the heap page is
854 : : * already up-to-date.
855 : : */
856 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
857 : : {
858 : 0 : Relation reln = CreateFakeRelcacheEntry(rlocator);
859 : 0 : Buffer vmbuffer = InvalidBuffer;
860 : :
861 : 0 : visibilitymap_pin(reln, oldblk, &vmbuffer);
862 : 0 : visibilitymap_clear(reln, oldblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
863 : 0 : ReleaseBuffer(vmbuffer);
864 : 0 : FreeFakeRelcacheEntry(reln);
865 : 0 : }
866 : :
867 : : /*
868 : : * In normal operation, it is important to lock the two pages in
869 : : * page-number order, to avoid possible deadlocks against other update
870 : : * operations going the other way. However, during WAL replay there can
871 : : * be no other update happening, so we don't need to worry about that. But
872 : : * we *do* need to worry that we don't expose an inconsistent state to Hot
873 : : * Standby queries --- so the original page can't be unlocked before we've
874 : : * added the new tuple to the new page.
875 : : */
876 : :
877 : : /* Deal with old tuple version */
878 : 0 : oldaction = XLogReadBufferForRedo(record, (oldblk == newblk) ? 0 : 1,
879 : : &obuffer);
880 [ # # ]: 0 : if (oldaction == BLK_NEEDS_REDO)
881 : : {
882 : 0 : page = BufferGetPage(obuffer);
883 : 0 : offnum = xlrec->old_offnum;
884 [ # # ]: 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
885 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
886 : 0 : lp = PageGetItemId(page, offnum);
887 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
888 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
889 : :
890 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
891 : :
892 : 0 : oldtup.t_data = htup;
893 : 0 : oldtup.t_len = ItemIdGetLength(lp);
894 : :
895 : 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
896 : 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
897 [ # # ]: 0 : if (hot_update)
898 : 0 : HeapTupleHeaderSetHotUpdated(htup);
899 : : else
900 : 0 : HeapTupleHeaderClearHotUpdated(htup);
901 : 0 : fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
902 : 0 : &htup->t_infomask2);
903 : 0 : HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
904 : 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
905 : : /* Set forward chain link in t_ctid */
906 : 0 : htup->t_ctid = newtid;
907 : :
908 : : /* Mark the page as a candidate for pruning */
909 [ # # # # : 0 : PageSetPrunable(page, XLogRecGetXid(record));
# # ]
910 : :
911 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
912 : 0 : PageClearAllVisible(page);
913 : :
914 : 0 : PageSetLSN(page, lsn);
915 : 0 : MarkBufferDirty(obuffer);
916 : 0 : }
917 : :
918 : : /*
919 : : * Read the page the new tuple goes into, if different from old.
920 : : */
921 [ # # ]: 0 : if (oldblk == newblk)
922 : : {
923 : 0 : nbuffer = obuffer;
924 : 0 : newaction = oldaction;
925 : 0 : }
926 [ # # ]: 0 : else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
927 : : {
928 : 0 : nbuffer = XLogInitBufferForRedo(record, 0);
929 : 0 : page = BufferGetPage(nbuffer);
930 : 0 : PageInit(page, BufferGetPageSize(nbuffer), 0);
931 : 0 : newaction = BLK_NEEDS_REDO;
932 : 0 : }
933 : : else
934 : 0 : newaction = XLogReadBufferForRedo(record, 0, &nbuffer);
935 : :
936 : : /*
937 : : * The visibility map may need to be fixed even if the heap page is
938 : : * already up-to-date.
939 : : */
940 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
941 : : {
942 : 0 : Relation reln = CreateFakeRelcacheEntry(rlocator);
943 : 0 : Buffer vmbuffer = InvalidBuffer;
944 : :
945 : 0 : visibilitymap_pin(reln, newblk, &vmbuffer);
946 : 0 : visibilitymap_clear(reln, newblk, vmbuffer, VISIBILITYMAP_VALID_BITS);
947 : 0 : ReleaseBuffer(vmbuffer);
948 : 0 : FreeFakeRelcacheEntry(reln);
949 : 0 : }
950 : :
951 : : /* Deal with new tuple */
952 [ # # ]: 0 : if (newaction == BLK_NEEDS_REDO)
953 : : {
954 : 0 : char *recdata;
955 : 0 : char *recdata_end;
956 : 0 : Size datalen;
957 : 0 : Size tuplen;
958 : :
959 : 0 : recdata = XLogRecGetBlockData(record, 0, &datalen);
960 : 0 : recdata_end = recdata + datalen;
961 : :
962 : 0 : page = BufferGetPage(nbuffer);
963 : :
964 : 0 : offnum = xlrec->new_offnum;
965 [ # # ]: 0 : if (PageGetMaxOffsetNumber(page) + 1 < offnum)
966 [ # # # # ]: 0 : elog(PANIC, "invalid max offset number");
967 : :
968 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
969 : : {
970 [ # # ]: 0 : Assert(newblk == oldblk);
971 : 0 : memcpy(&prefixlen, recdata, sizeof(uint16));
972 : 0 : recdata += sizeof(uint16);
973 : 0 : }
974 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
975 : : {
976 [ # # ]: 0 : Assert(newblk == oldblk);
977 : 0 : memcpy(&suffixlen, recdata, sizeof(uint16));
978 : 0 : recdata += sizeof(uint16);
979 : 0 : }
980 : :
981 : 0 : memcpy(&xlhdr, recdata, SizeOfHeapHeader);
982 : 0 : recdata += SizeOfHeapHeader;
983 : :
984 : 0 : tuplen = recdata_end - recdata;
985 [ # # ]: 0 : Assert(tuplen <= MaxHeapTupleSize);
986 : :
987 : 0 : htup = &tbuf.hdr;
988 [ # # # # : 0 : MemSet(htup, 0, SizeofHeapTupleHeader);
# # # # #
# ]
989 : :
990 : : /*
991 : : * Reconstruct the new tuple using the prefix and/or suffix from the
992 : : * old tuple, and the data stored in the WAL record.
993 : : */
994 : 0 : newp = (char *) htup + SizeofHeapTupleHeader;
995 [ # # ]: 0 : if (prefixlen > 0)
996 : : {
997 : 0 : int len;
998 : :
999 : : /* copy bitmap [+ padding] [+ oid] from WAL record */
1000 : 0 : len = xlhdr.t_hoff - SizeofHeapTupleHeader;
1001 : 0 : memcpy(newp, recdata, len);
1002 : 0 : recdata += len;
1003 : 0 : newp += len;
1004 : :
1005 : : /* copy prefix from old tuple */
1006 : 0 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_data->t_hoff, prefixlen);
1007 : 0 : newp += prefixlen;
1008 : :
1009 : : /* copy new tuple data from WAL record */
1010 : 0 : len = tuplen - (xlhdr.t_hoff - SizeofHeapTupleHeader);
1011 : 0 : memcpy(newp, recdata, len);
1012 : 0 : recdata += len;
1013 : 0 : newp += len;
1014 : 0 : }
1015 : : else
1016 : : {
1017 : : /*
1018 : : * copy bitmap [+ padding] [+ oid] + data from record, all in one
1019 : : * go
1020 : : */
1021 : 0 : memcpy(newp, recdata, tuplen);
1022 : 0 : recdata += tuplen;
1023 : 0 : newp += tuplen;
1024 : : }
1025 [ # # ]: 0 : Assert(recdata == recdata_end);
1026 : :
1027 : : /* copy suffix from old tuple */
1028 [ # # ]: 0 : if (suffixlen > 0)
1029 : 0 : memcpy(newp, (char *) oldtup.t_data + oldtup.t_len - suffixlen, suffixlen);
1030 : :
1031 : 0 : newlen = SizeofHeapTupleHeader + tuplen + prefixlen + suffixlen;
1032 : 0 : htup->t_infomask2 = xlhdr.t_infomask2;
1033 : 0 : htup->t_infomask = xlhdr.t_infomask;
1034 : 0 : htup->t_hoff = xlhdr.t_hoff;
1035 : :
1036 : 0 : HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
1037 : 0 : HeapTupleHeaderSetCmin(htup, FirstCommandId);
1038 : 0 : HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
1039 : : /* Make sure there is no forward chain link in t_ctid */
1040 : 0 : htup->t_ctid = newtid;
1041 : :
1042 : 0 : offnum = PageAddItem(page, htup, newlen, offnum, true, true);
1043 [ # # ]: 0 : if (offnum == InvalidOffsetNumber)
1044 [ # # # # ]: 0 : elog(PANIC, "failed to add tuple");
1045 : :
1046 [ # # ]: 0 : if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
1047 : 0 : PageClearAllVisible(page);
1048 : :
1049 : 0 : freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
1050 : :
1051 : 0 : PageSetLSN(page, lsn);
1052 : 0 : MarkBufferDirty(nbuffer);
1053 : 0 : }
1054 : :
1055 [ # # # # ]: 0 : if (BufferIsValid(nbuffer) && nbuffer != obuffer)
1056 : 0 : UnlockReleaseBuffer(nbuffer);
1057 [ # # ]: 0 : if (BufferIsValid(obuffer))
1058 : 0 : UnlockReleaseBuffer(obuffer);
1059 : :
1060 : : /*
1061 : : * If the new page is running low on free space, update the FSM as well.
1062 : : * Arbitrarily, our definition of "low" is less than 20%. We can't do much
1063 : : * better than that without knowing the fill-factor for the table.
1064 : : *
1065 : : * However, don't update the FSM on HOT updates, because after crash
1066 : : * recovery, either the old or the new tuple will certainly be dead and
1067 : : * prunable. After pruning, the page will have roughly as much free space
1068 : : * as it did before the update, assuming the new tuple is about the same
1069 : : * size as the old one.
1070 : : *
1071 : : * XXX: Don't do this if the page was restored from full page image. We
1072 : : * don't bother to update the FSM in that case, it doesn't need to be
1073 : : * totally accurate anyway.
1074 : : */
1075 [ # # # # : 0 : if (newaction == BLK_NEEDS_REDO && !hot_update && freespace < BLCKSZ / 5)
# # ]
1076 : 0 : XLogRecordPageWithFreeSpace(rlocator, newblk, freespace);
1077 : 0 : }
1078 : :
1079 : : /*
1080 : : * Replay XLOG_HEAP_CONFIRM records.
1081 : : */
1082 : : static void
1083 : 0 : heap_xlog_confirm(XLogReaderState *record)
1084 : : {
1085 : 0 : XLogRecPtr lsn = record->EndRecPtr;
1086 : 0 : xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
1087 : 0 : Buffer buffer;
1088 : 0 : Page page;
1089 : 0 : OffsetNumber offnum;
1090 : 0 : ItemId lp;
1091 : 0 : HeapTupleHeader htup;
1092 : :
1093 [ # # ]: 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1094 : : {
1095 : 0 : page = BufferGetPage(buffer);
1096 : :
1097 : 0 : offnum = xlrec->offnum;
1098 [ # # ]: 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1099 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
1100 : 0 : lp = PageGetItemId(page, offnum);
1101 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
1102 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
1103 : :
1104 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1105 : :
1106 : : /*
1107 : : * Confirm tuple as actually inserted
1108 : : */
1109 : 0 : ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
1110 : :
1111 : 0 : PageSetLSN(page, lsn);
1112 : 0 : MarkBufferDirty(buffer);
1113 : 0 : }
1114 [ # # ]: 0 : if (BufferIsValid(buffer))
1115 : 0 : UnlockReleaseBuffer(buffer);
1116 : 0 : }
1117 : :
1118 : : /*
1119 : : * Replay XLOG_HEAP_LOCK records.
1120 : : */
1121 : : static void
1122 : 0 : heap_xlog_lock(XLogReaderState *record)
1123 : : {
1124 : 0 : XLogRecPtr lsn = record->EndRecPtr;
1125 : 0 : xl_heap_lock *xlrec = (xl_heap_lock *) XLogRecGetData(record);
1126 : 0 : Buffer buffer;
1127 : 0 : Page page;
1128 : 0 : OffsetNumber offnum;
1129 : 0 : ItemId lp;
1130 : 0 : HeapTupleHeader htup;
1131 : :
1132 : : /*
1133 : : * The visibility map may need to be fixed even if the heap page is
1134 : : * already up-to-date.
1135 : : */
1136 [ # # ]: 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1137 : : {
1138 : 0 : RelFileLocator rlocator;
1139 : 0 : Buffer vmbuffer = InvalidBuffer;
1140 : 0 : BlockNumber block;
1141 : 0 : Relation reln;
1142 : :
1143 : 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1144 : 0 : reln = CreateFakeRelcacheEntry(rlocator);
1145 : :
1146 : 0 : visibilitymap_pin(reln, block, &vmbuffer);
1147 : 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1148 : :
1149 : 0 : ReleaseBuffer(vmbuffer);
1150 : 0 : FreeFakeRelcacheEntry(reln);
1151 : 0 : }
1152 : :
1153 [ # # ]: 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1154 : : {
1155 : 0 : page = BufferGetPage(buffer);
1156 : :
1157 : 0 : offnum = xlrec->offnum;
1158 [ # # ]: 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1159 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
1160 : 0 : lp = PageGetItemId(page, offnum);
1161 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
1162 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
1163 : :
1164 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1165 : :
1166 : 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1167 : 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1168 : 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1169 : 0 : &htup->t_infomask2);
1170 : :
1171 : : /*
1172 : : * Clear relevant update flags, but only if the modified infomask says
1173 : : * there's no update.
1174 : : */
1175 [ # # ]: 0 : if (HEAP_XMAX_IS_LOCKED_ONLY(htup->t_infomask))
1176 : : {
1177 : 0 : HeapTupleHeaderClearHotUpdated(htup);
1178 : : /* Make sure there is no forward chain link in t_ctid */
1179 : 0 : ItemPointerSet(&htup->t_ctid,
1180 : 0 : BufferGetBlockNumber(buffer),
1181 : 0 : offnum);
1182 : 0 : }
1183 : 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1184 : 0 : HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
1185 : 0 : PageSetLSN(page, lsn);
1186 : 0 : MarkBufferDirty(buffer);
1187 : 0 : }
1188 [ # # ]: 0 : if (BufferIsValid(buffer))
1189 : 0 : UnlockReleaseBuffer(buffer);
1190 : 0 : }
1191 : :
1192 : : /*
1193 : : * Replay XLOG_HEAP2_LOCK_UPDATED records.
1194 : : */
1195 : : static void
1196 : 0 : heap_xlog_lock_updated(XLogReaderState *record)
1197 : : {
1198 : 0 : XLogRecPtr lsn = record->EndRecPtr;
1199 : 0 : xl_heap_lock_updated *xlrec;
1200 : 0 : Buffer buffer;
1201 : 0 : Page page;
1202 : 0 : OffsetNumber offnum;
1203 : 0 : ItemId lp;
1204 : 0 : HeapTupleHeader htup;
1205 : :
1206 : 0 : xlrec = (xl_heap_lock_updated *) XLogRecGetData(record);
1207 : :
1208 : : /*
1209 : : * The visibility map may need to be fixed even if the heap page is
1210 : : * already up-to-date.
1211 : : */
1212 [ # # ]: 0 : if (xlrec->flags & XLH_LOCK_ALL_FROZEN_CLEARED)
1213 : : {
1214 : 0 : RelFileLocator rlocator;
1215 : 0 : Buffer vmbuffer = InvalidBuffer;
1216 : 0 : BlockNumber block;
1217 : 0 : Relation reln;
1218 : :
1219 : 0 : XLogRecGetBlockTag(record, 0, &rlocator, NULL, &block);
1220 : 0 : reln = CreateFakeRelcacheEntry(rlocator);
1221 : :
1222 : 0 : visibilitymap_pin(reln, block, &vmbuffer);
1223 : 0 : visibilitymap_clear(reln, block, vmbuffer, VISIBILITYMAP_ALL_FROZEN);
1224 : :
1225 : 0 : ReleaseBuffer(vmbuffer);
1226 : 0 : FreeFakeRelcacheEntry(reln);
1227 : 0 : }
1228 : :
1229 [ # # ]: 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1230 : : {
1231 : 0 : page = BufferGetPage(buffer);
1232 : :
1233 : 0 : offnum = xlrec->offnum;
1234 [ # # ]: 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1235 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
1236 : 0 : lp = PageGetItemId(page, offnum);
1237 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
1238 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
1239 : :
1240 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1241 : :
1242 : 0 : htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
1243 : 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
1244 : 0 : fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
1245 : 0 : &htup->t_infomask2);
1246 : 0 : HeapTupleHeaderSetXmax(htup, xlrec->xmax);
1247 : :
1248 : 0 : PageSetLSN(page, lsn);
1249 : 0 : MarkBufferDirty(buffer);
1250 : 0 : }
1251 [ # # ]: 0 : if (BufferIsValid(buffer))
1252 : 0 : UnlockReleaseBuffer(buffer);
1253 : 0 : }
1254 : :
1255 : : /*
1256 : : * Replay XLOG_HEAP_INPLACE records.
1257 : : */
1258 : : static void
1259 : 0 : heap_xlog_inplace(XLogReaderState *record)
1260 : : {
1261 : 0 : XLogRecPtr lsn = record->EndRecPtr;
1262 : 0 : xl_heap_inplace *xlrec = (xl_heap_inplace *) XLogRecGetData(record);
1263 : 0 : Buffer buffer;
1264 : 0 : Page page;
1265 : 0 : OffsetNumber offnum;
1266 : 0 : ItemId lp;
1267 : 0 : HeapTupleHeader htup;
1268 : 0 : uint32 oldlen;
1269 : 0 : Size newlen;
1270 : :
1271 [ # # ]: 0 : if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
1272 : : {
1273 : 0 : char *newtup = XLogRecGetBlockData(record, 0, &newlen);
1274 : :
1275 : 0 : page = BufferGetPage(buffer);
1276 : :
1277 : 0 : offnum = xlrec->offnum;
1278 [ # # ]: 0 : if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
1279 [ # # # # ]: 0 : elog(PANIC, "offnum out of range");
1280 : 0 : lp = PageGetItemId(page, offnum);
1281 [ # # ]: 0 : if (!ItemIdIsNormal(lp))
1282 [ # # # # ]: 0 : elog(PANIC, "invalid lp");
1283 : :
1284 : 0 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1285 : :
1286 : 0 : oldlen = ItemIdGetLength(lp) - htup->t_hoff;
1287 [ # # ]: 0 : if (oldlen != newlen)
1288 [ # # # # ]: 0 : elog(PANIC, "wrong tuple length");
1289 : :
1290 : 0 : memcpy((char *) htup + htup->t_hoff, newtup, newlen);
1291 : :
1292 : 0 : PageSetLSN(page, lsn);
1293 : 0 : MarkBufferDirty(buffer);
1294 : 0 : }
1295 [ # # ]: 0 : if (BufferIsValid(buffer))
1296 : 0 : UnlockReleaseBuffer(buffer);
1297 : :
1298 : 0 : ProcessCommittedInvalidationMessages(xlrec->msgs,
1299 : 0 : xlrec->nmsgs,
1300 : 0 : xlrec->relcacheInitFileInval,
1301 : 0 : xlrec->dbId,
1302 : 0 : xlrec->tsId);
1303 : 0 : }
1304 : :
1305 : : void
1306 : 0 : heap_redo(XLogReaderState *record)
1307 : : {
1308 : 0 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1309 : :
1310 : : /*
1311 : : * These operations don't overwrite MVCC data so no conflict processing is
1312 : : * required. The ones in heap2 rmgr do.
1313 : : */
1314 : :
1315 [ # # # # : 0 : switch (info & XLOG_HEAP_OPMASK)
# # # #
# ]
1316 : : {
1317 : : case XLOG_HEAP_INSERT:
1318 : 0 : heap_xlog_insert(record);
1319 : 0 : break;
1320 : : case XLOG_HEAP_DELETE:
1321 : 0 : heap_xlog_delete(record);
1322 : 0 : break;
1323 : : case XLOG_HEAP_UPDATE:
1324 : 0 : heap_xlog_update(record, false);
1325 : 0 : break;
1326 : : case XLOG_HEAP_TRUNCATE:
1327 : :
1328 : : /*
1329 : : * TRUNCATE is a no-op because the actions are already logged as
1330 : : * SMGR WAL records. TRUNCATE WAL record only exists for logical
1331 : : * decoding.
1332 : : */
1333 : : break;
1334 : : case XLOG_HEAP_HOT_UPDATE:
1335 : 0 : heap_xlog_update(record, true);
1336 : 0 : break;
1337 : : case XLOG_HEAP_CONFIRM:
1338 : 0 : heap_xlog_confirm(record);
1339 : 0 : break;
1340 : : case XLOG_HEAP_LOCK:
1341 : 0 : heap_xlog_lock(record);
1342 : 0 : break;
1343 : : case XLOG_HEAP_INPLACE:
1344 : 0 : heap_xlog_inplace(record);
1345 : 0 : break;
1346 : : default:
1347 [ # # # # ]: 0 : elog(PANIC, "heap_redo: unknown op code %u", info);
1348 : 0 : }
1349 : 0 : }
1350 : :
1351 : : void
1352 : 0 : heap2_redo(XLogReaderState *record)
1353 : : {
1354 : 0 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
1355 : :
1356 [ # # # # : 0 : switch (info & XLOG_HEAP_OPMASK)
# # # ]
1357 : : {
1358 : : case XLOG_HEAP2_PRUNE_ON_ACCESS:
1359 : : case XLOG_HEAP2_PRUNE_VACUUM_SCAN:
1360 : : case XLOG_HEAP2_PRUNE_VACUUM_CLEANUP:
1361 : 0 : heap_xlog_prune_freeze(record);
1362 : 0 : break;
1363 : : case XLOG_HEAP2_VISIBLE:
1364 : 0 : heap_xlog_visible(record);
1365 : 0 : break;
1366 : : case XLOG_HEAP2_MULTI_INSERT:
1367 : 0 : heap_xlog_multi_insert(record);
1368 : 0 : break;
1369 : : case XLOG_HEAP2_LOCK_UPDATED:
1370 : 0 : heap_xlog_lock_updated(record);
1371 : 0 : break;
1372 : : case XLOG_HEAP2_NEW_CID:
1373 : :
1374 : : /*
1375 : : * Nothing to do on a real replay, only used during logical
1376 : : * decoding.
1377 : : */
1378 : : break;
1379 : : case XLOG_HEAP2_REWRITE:
1380 : 0 : heap_xlog_logical_rewrite(record);
1381 : 0 : break;
1382 : : default:
1383 [ # # # # ]: 0 : elog(PANIC, "heap2_redo: unknown op code %u", info);
1384 : 0 : }
1385 : 0 : }
1386 : :
1387 : : /*
1388 : : * Mask a heap page before performing consistency checks on it.
1389 : : */
1390 : : void
1391 : 0 : heap_mask(char *pagedata, BlockNumber blkno)
1392 : : {
1393 : 0 : Page page = (Page) pagedata;
1394 : 0 : OffsetNumber off;
1395 : :
1396 : 0 : mask_page_lsn_and_checksum(page);
1397 : :
1398 : 0 : mask_page_hint_bits(page);
1399 : 0 : mask_unused_space(page);
1400 : :
1401 [ # # ]: 0 : for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
1402 : : {
1403 : 0 : ItemId iid = PageGetItemId(page, off);
1404 : 0 : char *page_item;
1405 : :
1406 : 0 : page_item = (char *) (page + ItemIdGetOffset(iid));
1407 : :
1408 [ # # ]: 0 : if (ItemIdIsNormal(iid))
1409 : : {
1410 : 0 : HeapTupleHeader page_htup = (HeapTupleHeader) page_item;
1411 : :
1412 : : /*
1413 : : * If xmin of a tuple is not yet frozen, we should ignore
1414 : : * differences in hint bits, since they can be set without
1415 : : * emitting WAL.
1416 : : */
1417 [ # # ]: 0 : if (!HeapTupleHeaderXminFrozen(page_htup))
1418 : 0 : page_htup->t_infomask &= ~HEAP_XACT_MASK;
1419 : : else
1420 : : {
1421 : : /* Still we need to mask xmax hint bits. */
1422 : 0 : page_htup->t_infomask &= ~HEAP_XMAX_INVALID;
1423 : 0 : page_htup->t_infomask &= ~HEAP_XMAX_COMMITTED;
1424 : : }
1425 : :
1426 : : /*
1427 : : * During replay, we set Command Id to FirstCommandId. Hence, mask
1428 : : * it. See heap_xlog_insert() for details.
1429 : : */
1430 : 0 : page_htup->t_choice.t_heap.t_field3.t_cid = MASK_MARKER;
1431 : :
1432 : : /*
1433 : : * For a speculative tuple, heap_insert() does not set ctid in the
1434 : : * caller-passed heap tuple itself, leaving the ctid field to
1435 : : * contain a speculative token value - a per-backend monotonically
1436 : : * increasing identifier. Besides, it does not WAL-log ctid under
1437 : : * any circumstances.
1438 : : *
1439 : : * During redo, heap_xlog_insert() sets t_ctid to current block
1440 : : * number and self offset number. It doesn't care about any
1441 : : * speculative insertions on the primary. Hence, we set t_ctid to
1442 : : * current block number and self offset number to ignore any
1443 : : * inconsistency.
1444 : : */
1445 [ # # ]: 0 : if (HeapTupleHeaderIsSpeculative(page_htup))
1446 : 0 : ItemPointerSet(&page_htup->t_ctid, blkno, off);
1447 : :
1448 : : /*
1449 : : * NB: Not ignoring ctid changes due to the tuple having moved
1450 : : * (i.e. HeapTupleHeaderIndicatesMovedPartitions), because that's
1451 : : * important information that needs to be in-sync between primary
1452 : : * and standby, and thus is WAL logged.
1453 : : */
1454 : 0 : }
1455 : :
1456 : : /*
1457 : : * Ignore any padding bytes after the tuple, when the length of the
1458 : : * item is not MAXALIGNed.
1459 : : */
1460 [ # # ]: 0 : if (ItemIdHasStorage(iid))
1461 : : {
1462 : 0 : int len = ItemIdGetLength(iid);
1463 : 0 : int padlen = MAXALIGN(len) - len;
1464 : :
1465 [ # # ]: 0 : if (padlen > 0)
1466 : 0 : memset(page_item + len, MASK_MARKER, padlen);
1467 : 0 : }
1468 : 0 : }
1469 : 0 : }
|