Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heap_surgery.c
4 : * Functions to perform surgery on the damaged heap table.
5 : *
6 : * Copyright (c) 2020-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * contrib/pg_surgery/heap_surgery.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/htup_details.h"
16 : #include "access/relation.h"
17 : #include "access/visibilitymap.h"
18 : #include "access/xloginsert.h"
19 : #include "catalog/pg_am_d.h"
20 : #include "miscadmin.h"
21 : #include "storage/bufmgr.h"
22 : #include "utils/acl.h"
23 : #include "utils/array.h"
24 : #include "utils/rel.h"
25 :
26 0 : PG_MODULE_MAGIC_EXT(
27 : .name = "pg_surgery",
28 : .version = PG_VERSION
29 : );
30 :
31 : /* Options to forcefully change the state of a heap tuple. */
32 : typedef enum HeapTupleForceOption
33 : {
34 : HEAP_FORCE_KILL,
35 : HEAP_FORCE_FREEZE,
36 : } HeapTupleForceOption;
37 :
38 0 : PG_FUNCTION_INFO_V1(heap_force_kill);
39 0 : PG_FUNCTION_INFO_V1(heap_force_freeze);
40 :
41 : static int32 tidcmp(const void *a, const void *b);
42 : static Datum heap_force_common(FunctionCallInfo fcinfo,
43 : HeapTupleForceOption heap_force_opt);
44 : static void sanity_check_tid_array(ArrayType *ta, int *ntids);
45 : static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
46 : OffsetNumber *next_start_ptr);
47 :
48 : /*-------------------------------------------------------------------------
49 : * heap_force_kill()
50 : *
51 : * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
52 : * given TID array.
53 : *
54 : * Usage: SELECT heap_force_kill(regclass, tid[]);
55 : *-------------------------------------------------------------------------
56 : */
57 : Datum
58 0 : heap_force_kill(PG_FUNCTION_ARGS)
59 : {
60 0 : PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
61 : }
62 :
63 : /*-------------------------------------------------------------------------
64 : * heap_force_freeze()
65 : *
66 : * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
67 : * given TID array.
68 : *
69 : * Usage: SELECT heap_force_freeze(regclass, tid[]);
70 : *-------------------------------------------------------------------------
71 : */
72 : Datum
73 0 : heap_force_freeze(PG_FUNCTION_ARGS)
74 : {
75 0 : PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
76 : }
77 :
78 : /*-------------------------------------------------------------------------
79 : * heap_force_common()
80 : *
81 : * Common code for heap_force_kill and heap_force_freeze
82 : *-------------------------------------------------------------------------
83 : */
84 : static Datum
85 0 : heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
86 : {
87 0 : Oid relid = PG_GETARG_OID(0);
88 0 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
89 0 : ItemPointer tids;
90 0 : int ntids,
91 : nblocks;
92 0 : Relation rel;
93 0 : OffsetNumber curr_start_ptr,
94 : next_start_ptr;
95 0 : bool include_this_tid[MaxHeapTuplesPerPage];
96 :
97 0 : if (RecoveryInProgress())
98 0 : ereport(ERROR,
99 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
100 : errmsg("recovery is in progress"),
101 : errhint("Heap surgery functions cannot be executed during recovery.")));
102 :
103 : /* Check inputs. */
104 0 : sanity_check_tid_array(ta, &ntids);
105 :
106 0 : rel = relation_open(relid, RowExclusiveLock);
107 :
108 : /*
109 : * Check target relation.
110 : */
111 0 : if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
112 0 : ereport(ERROR,
113 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
114 : errmsg("cannot operate on relation \"%s\"",
115 : RelationGetRelationName(rel)),
116 : errdetail_relkind_not_supported(rel->rd_rel->relkind)));
117 :
118 0 : if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
119 0 : ereport(ERROR,
120 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
121 : errmsg("only heap AM is supported")));
122 :
123 : /* Must be owner of the table or superuser. */
124 0 : if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
125 0 : aclcheck_error(ACLCHECK_NOT_OWNER,
126 0 : get_relkind_objtype(rel->rd_rel->relkind),
127 0 : RelationGetRelationName(rel));
128 :
129 0 : tids = ((ItemPointer) ARR_DATA_PTR(ta));
130 :
131 : /*
132 : * If there is more than one TID in the array, sort them so that we can
133 : * easily fetch all the TIDs belonging to one particular page from the
134 : * array.
135 : */
136 0 : if (ntids > 1)
137 0 : qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
138 :
139 0 : curr_start_ptr = next_start_ptr = 0;
140 0 : nblocks = RelationGetNumberOfBlocks(rel);
141 :
142 : /*
143 : * Loop, performing the necessary actions for each block.
144 : */
145 0 : while (next_start_ptr != ntids)
146 : {
147 0 : Buffer buf;
148 0 : Buffer vmbuf = InvalidBuffer;
149 0 : Page page;
150 0 : BlockNumber blkno;
151 0 : OffsetNumber curoff;
152 0 : OffsetNumber maxoffset;
153 0 : int i;
154 0 : bool did_modify_page = false;
155 0 : bool did_modify_vm = false;
156 :
157 0 : CHECK_FOR_INTERRUPTS();
158 :
159 : /*
160 : * Find all the TIDs belonging to one particular page starting from
161 : * next_start_ptr and process them one by one.
162 : */
163 0 : blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
164 :
165 : /* Check whether the block number is valid. */
166 0 : if (blkno >= nblocks)
167 : {
168 : /* Update the current_start_ptr before moving to the next page. */
169 0 : curr_start_ptr = next_start_ptr;
170 :
171 0 : ereport(NOTICE,
172 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
173 : errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
174 : blkno, RelationGetRelationName(rel))));
175 0 : continue;
176 : }
177 :
178 0 : buf = ReadBuffer(rel, blkno);
179 0 : LockBufferForCleanup(buf);
180 :
181 0 : page = BufferGetPage(buf);
182 :
183 0 : maxoffset = PageGetMaxOffsetNumber(page);
184 :
185 : /*
186 : * Figure out which TIDs we are going to process and which ones we are
187 : * going to skip.
188 : */
189 0 : memset(include_this_tid, 0, sizeof(include_this_tid));
190 0 : for (i = curr_start_ptr; i < next_start_ptr; i++)
191 : {
192 0 : OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
193 0 : ItemId itemid;
194 :
195 : /* Check whether the offset number is valid. */
196 0 : if (offno == InvalidOffsetNumber || offno > maxoffset)
197 : {
198 0 : ereport(NOTICE,
199 : errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
200 : blkno, offno, RelationGetRelationName(rel)));
201 0 : continue;
202 : }
203 :
204 0 : itemid = PageGetItemId(page, offno);
205 :
206 : /* Only accept an item ID that is used. */
207 0 : if (ItemIdIsRedirected(itemid))
208 : {
209 0 : ereport(NOTICE,
210 : errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
211 : blkno, offno, RelationGetRelationName(rel),
212 : ItemIdGetRedirect(itemid)));
213 0 : continue;
214 : }
215 0 : else if (ItemIdIsDead(itemid))
216 : {
217 0 : ereport(NOTICE,
218 : (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
219 : blkno, offno, RelationGetRelationName(rel))));
220 0 : continue;
221 : }
222 0 : else if (!ItemIdIsUsed(itemid))
223 : {
224 0 : ereport(NOTICE,
225 : (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
226 : blkno, offno, RelationGetRelationName(rel))));
227 0 : continue;
228 : }
229 :
230 : /* Mark it for processing. */
231 0 : Assert(offno < MaxHeapTuplesPerPage);
232 0 : include_this_tid[offno] = true;
233 0 : }
234 :
235 : /*
236 : * Before entering the critical section, pin the visibility map page
237 : * if it appears to be necessary.
238 : */
239 0 : if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
240 0 : visibilitymap_pin(rel, blkno, &vmbuf);
241 :
242 : /* No ereport(ERROR) from here until all the changes are logged. */
243 0 : START_CRIT_SECTION();
244 :
245 0 : for (curoff = FirstOffsetNumber; curoff <= maxoffset;
246 0 : curoff = OffsetNumberNext(curoff))
247 : {
248 0 : ItemId itemid;
249 :
250 0 : if (!include_this_tid[curoff])
251 0 : continue;
252 :
253 0 : itemid = PageGetItemId(page, curoff);
254 0 : Assert(ItemIdIsNormal(itemid));
255 :
256 0 : did_modify_page = true;
257 :
258 0 : if (heap_force_opt == HEAP_FORCE_KILL)
259 : {
260 0 : ItemIdSetDead(itemid);
261 :
262 : /*
263 : * If the page is marked all-visible, we must clear
264 : * PD_ALL_VISIBLE flag on the page header and an all-visible
265 : * bit on the visibility map corresponding to the page.
266 : */
267 0 : if (PageIsAllVisible(page))
268 : {
269 0 : PageClearAllVisible(page);
270 0 : visibilitymap_clear(rel, blkno, vmbuf,
271 : VISIBILITYMAP_VALID_BITS);
272 0 : did_modify_vm = true;
273 0 : }
274 0 : }
275 : else
276 : {
277 0 : HeapTupleHeader htup;
278 :
279 0 : Assert(heap_force_opt == HEAP_FORCE_FREEZE);
280 :
281 0 : htup = (HeapTupleHeader) PageGetItem(page, itemid);
282 :
283 : /*
284 : * Reset all visibility-related fields of the tuple. This
285 : * logic should mimic heap_execute_freeze_tuple(), but we
286 : * choose to reset xmin and ctid just to be sure that no
287 : * potentially-garbled data is left behind.
288 : */
289 0 : ItemPointerSet(&htup->t_ctid, blkno, curoff);
290 0 : HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
291 0 : HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
292 0 : if (htup->t_infomask & HEAP_MOVED)
293 : {
294 0 : if (htup->t_infomask & HEAP_MOVED_OFF)
295 0 : HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
296 : else
297 0 : HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
298 0 : }
299 :
300 : /*
301 : * Clear all the visibility-related bits of this tuple and
302 : * mark it as frozen. Also, get rid of HOT_UPDATED and
303 : * KEYS_UPDATES bits.
304 : */
305 0 : htup->t_infomask &= ~HEAP_XACT_MASK;
306 0 : htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
307 0 : htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
308 0 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
309 0 : }
310 0 : }
311 :
312 : /*
313 : * If the page was modified, only then, we mark the buffer dirty or do
314 : * the WAL logging.
315 : */
316 0 : if (did_modify_page)
317 : {
318 : /* Mark buffer dirty before we write WAL. */
319 0 : MarkBufferDirty(buf);
320 :
321 : /* XLOG stuff */
322 0 : if (RelationNeedsWAL(rel))
323 0 : log_newpage_buffer(buf, true);
324 0 : }
325 :
326 : /* WAL log the VM page if it was modified. */
327 0 : if (did_modify_vm && RelationNeedsWAL(rel))
328 0 : log_newpage_buffer(vmbuf, false);
329 :
330 0 : END_CRIT_SECTION();
331 :
332 0 : UnlockReleaseBuffer(buf);
333 :
334 0 : if (vmbuf != InvalidBuffer)
335 0 : ReleaseBuffer(vmbuf);
336 :
337 : /* Update the current_start_ptr before moving to the next page. */
338 0 : curr_start_ptr = next_start_ptr;
339 0 : }
340 :
341 0 : relation_close(rel, RowExclusiveLock);
342 :
343 0 : pfree(ta);
344 :
345 0 : PG_RETURN_VOID();
346 0 : }
347 :
348 : /*-------------------------------------------------------------------------
349 : * tidcmp()
350 : *
351 : * Compare two item pointers, return -1, 0, or +1.
352 : *
353 : * See ItemPointerCompare for details.
354 : * ------------------------------------------------------------------------
355 : */
356 : static int32
357 0 : tidcmp(const void *a, const void *b)
358 : {
359 0 : const ItemPointerData *iptr1 = a;
360 0 : const ItemPointerData *iptr2 = b;
361 :
362 0 : return ItemPointerCompare(iptr1, iptr2);
363 0 : }
364 :
365 : /*-------------------------------------------------------------------------
366 : * sanity_check_tid_array()
367 : *
368 : * Perform sanity checks on the given tid array, and set *ntids to the
369 : * number of items in the array.
370 : * ------------------------------------------------------------------------
371 : */
372 : static void
373 0 : sanity_check_tid_array(ArrayType *ta, int *ntids)
374 : {
375 0 : if (ARR_HASNULL(ta) && array_contains_nulls(ta))
376 0 : ereport(ERROR,
377 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
378 : errmsg("array must not contain nulls")));
379 :
380 0 : if (ARR_NDIM(ta) > 1)
381 0 : ereport(ERROR,
382 : (errcode(ERRCODE_DATA_EXCEPTION),
383 : errmsg("argument must be empty or one-dimensional array")));
384 :
385 0 : *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
386 0 : }
387 :
388 : /*-------------------------------------------------------------------------
389 : * find_tids_one_page()
390 : *
391 : * Find all the tids residing in the same page as tids[next_start_ptr], and
392 : * update next_start_ptr so that it points to the first tid in the next page.
393 : *
394 : * NOTE: The input tids[] array must be sorted.
395 : * ------------------------------------------------------------------------
396 : */
397 : static BlockNumber
398 0 : find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
399 : {
400 0 : int i;
401 0 : BlockNumber prev_blkno,
402 : blkno;
403 :
404 0 : prev_blkno = blkno = InvalidBlockNumber;
405 :
406 0 : for (i = *next_start_ptr; i < ntids; i++)
407 : {
408 0 : ItemPointerData tid = tids[i];
409 :
410 0 : blkno = ItemPointerGetBlockNumberNoCheck(&tid);
411 :
412 0 : if (i == *next_start_ptr)
413 0 : prev_blkno = blkno;
414 :
415 0 : if (prev_blkno != blkno)
416 0 : break;
417 0 : }
418 :
419 0 : *next_start_ptr = i;
420 0 : return prev_blkno;
421 0 : }
|