LCOV - code coverage report
Current view: top level - contrib/pg_visibility - pg_visibility.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 418 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 25 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_visibility.c
       4              :  *        display visibility map information and page-level visibility bits
       5              :  *
       6              :  * Copyright (c) 2016-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  *        contrib/pg_visibility/pg_visibility.c
       9              :  *-------------------------------------------------------------------------
      10              :  */
      11              : #include "postgres.h"
      12              : 
      13              : #include "access/heapam.h"
      14              : #include "access/htup_details.h"
      15              : #include "access/visibilitymap.h"
      16              : #include "access/xloginsert.h"
      17              : #include "catalog/pg_type.h"
      18              : #include "catalog/storage_xlog.h"
      19              : #include "funcapi.h"
      20              : #include "miscadmin.h"
      21              : #include "storage/bufmgr.h"
      22              : #include "storage/proc.h"
      23              : #include "storage/procarray.h"
      24              : #include "storage/read_stream.h"
      25              : #include "storage/smgr.h"
      26              : #include "utils/rel.h"
      27              : 
      28            0 : PG_MODULE_MAGIC_EXT(
      29              :                                         .name = "pg_visibility",
      30              :                                         .version = PG_VERSION
      31              : );
      32              : 
      33              : typedef struct vbits
      34              : {
      35              :         BlockNumber next;
      36              :         BlockNumber count;
      37              :         uint8           bits[FLEXIBLE_ARRAY_MEMBER];
      38              : } vbits;
      39              : 
      40              : typedef struct corrupt_items
      41              : {
      42              :         BlockNumber next;
      43              :         BlockNumber count;
      44              :         ItemPointer tids;
      45              : } corrupt_items;
      46              : 
      47              : /* for collect_corrupt_items_read_stream_next_block */
      48              : struct collect_corrupt_items_read_stream_private
      49              : {
      50              :         bool            all_frozen;
      51              :         bool            all_visible;
      52              :         BlockNumber current_blocknum;
      53              :         BlockNumber last_exclusive;
      54              :         Relation        rel;
      55              :         Buffer          vmbuffer;
      56              : };
      57              : 
      58            0 : PG_FUNCTION_INFO_V1(pg_visibility_map);
      59            0 : PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
      60            0 : PG_FUNCTION_INFO_V1(pg_visibility);
      61            0 : PG_FUNCTION_INFO_V1(pg_visibility_rel);
      62            0 : PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
      63            0 : PG_FUNCTION_INFO_V1(pg_check_frozen);
      64            0 : PG_FUNCTION_INFO_V1(pg_check_visible);
      65            0 : PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
      66              : 
      67              : static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
      68              : static vbits *collect_visibility_data(Oid relid, bool include_pd);
      69              : static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
      70              :                                                                                         bool all_frozen);
      71              : static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
      72              : static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
      73              :                                                           Buffer buffer);
      74              : static void check_relation_relkind(Relation rel);
      75              : 
      76              : /*
      77              :  * Visibility map information for a single block of a relation.
      78              :  *
      79              :  * Note: the VM code will silently return zeroes for pages past the end
      80              :  * of the map, so we allow probes up to MaxBlockNumber regardless of the
      81              :  * actual relation size.
      82              :  */
      83              : Datum
      84            0 : pg_visibility_map(PG_FUNCTION_ARGS)
      85              : {
      86            0 :         Oid                     relid = PG_GETARG_OID(0);
      87            0 :         int64           blkno = PG_GETARG_INT64(1);
      88            0 :         int32           mapbits;
      89            0 :         Relation        rel;
      90            0 :         Buffer          vmbuffer = InvalidBuffer;
      91            0 :         TupleDesc       tupdesc;
      92            0 :         Datum           values[2];
      93            0 :         bool            nulls[2] = {0};
      94              : 
      95            0 :         rel = relation_open(relid, AccessShareLock);
      96              : 
      97              :         /* Only some relkinds have a visibility map */
      98            0 :         check_relation_relkind(rel);
      99              : 
     100            0 :         if (blkno < 0 || blkno > MaxBlockNumber)
     101            0 :                 ereport(ERROR,
     102              :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     103              :                                  errmsg("invalid block number")));
     104              : 
     105            0 :         tupdesc = pg_visibility_tupdesc(false, false);
     106              : 
     107            0 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     108            0 :         if (vmbuffer != InvalidBuffer)
     109            0 :                 ReleaseBuffer(vmbuffer);
     110            0 :         values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
     111            0 :         values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
     112              : 
     113            0 :         relation_close(rel, AccessShareLock);
     114              : 
     115            0 :         PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     116            0 : }
     117              : 
     118              : /*
     119              :  * Visibility map information for a single block of a relation, plus the
     120              :  * page-level information for the same block.
     121              :  */
     122              : Datum
     123            0 : pg_visibility(PG_FUNCTION_ARGS)
     124              : {
     125            0 :         Oid                     relid = PG_GETARG_OID(0);
     126            0 :         int64           blkno = PG_GETARG_INT64(1);
     127            0 :         int32           mapbits;
     128            0 :         Relation        rel;
     129            0 :         Buffer          vmbuffer = InvalidBuffer;
     130            0 :         Buffer          buffer;
     131            0 :         Page            page;
     132            0 :         TupleDesc       tupdesc;
     133            0 :         Datum           values[3];
     134            0 :         bool            nulls[3] = {0};
     135              : 
     136            0 :         rel = relation_open(relid, AccessShareLock);
     137              : 
     138              :         /* Only some relkinds have a visibility map */
     139            0 :         check_relation_relkind(rel);
     140              : 
     141            0 :         if (blkno < 0 || blkno > MaxBlockNumber)
     142            0 :                 ereport(ERROR,
     143              :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     144              :                                  errmsg("invalid block number")));
     145              : 
     146            0 :         tupdesc = pg_visibility_tupdesc(false, true);
     147              : 
     148            0 :         mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     149            0 :         if (vmbuffer != InvalidBuffer)
     150            0 :                 ReleaseBuffer(vmbuffer);
     151            0 :         values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
     152            0 :         values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
     153              : 
     154              :         /* Here we have to explicitly check rel size ... */
     155            0 :         if (blkno < RelationGetNumberOfBlocks(rel))
     156              :         {
     157            0 :                 buffer = ReadBuffer(rel, blkno);
     158            0 :                 LockBuffer(buffer, BUFFER_LOCK_SHARE);
     159              : 
     160            0 :                 page = BufferGetPage(buffer);
     161            0 :                 values[2] = BoolGetDatum(PageIsAllVisible(page));
     162              : 
     163            0 :                 UnlockReleaseBuffer(buffer);
     164            0 :         }
     165              :         else
     166              :         {
     167              :                 /* As with the vismap, silently return 0 for pages past EOF */
     168            0 :                 values[2] = BoolGetDatum(false);
     169              :         }
     170              : 
     171            0 :         relation_close(rel, AccessShareLock);
     172              : 
     173            0 :         PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     174            0 : }
     175              : 
     176              : /*
     177              :  * Visibility map information for every block in a relation.
     178              :  */
     179              : Datum
     180            0 : pg_visibility_map_rel(PG_FUNCTION_ARGS)
     181              : {
     182            0 :         FuncCallContext *funcctx;
     183            0 :         vbits      *info;
     184              : 
     185            0 :         if (SRF_IS_FIRSTCALL())
     186              :         {
     187            0 :                 Oid                     relid = PG_GETARG_OID(0);
     188            0 :                 MemoryContext oldcontext;
     189              : 
     190            0 :                 funcctx = SRF_FIRSTCALL_INIT();
     191            0 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     192            0 :                 funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
     193              :                 /* collect_visibility_data will verify the relkind */
     194            0 :                 funcctx->user_fctx = collect_visibility_data(relid, false);
     195            0 :                 MemoryContextSwitchTo(oldcontext);
     196            0 :         }
     197              : 
     198            0 :         funcctx = SRF_PERCALL_SETUP();
     199            0 :         info = (vbits *) funcctx->user_fctx;
     200              : 
     201            0 :         if (info->next < info->count)
     202              :         {
     203            0 :                 Datum           values[3];
     204            0 :                 bool            nulls[3] = {0};
     205            0 :                 HeapTuple       tuple;
     206              : 
     207            0 :                 values[0] = Int64GetDatum(info->next);
     208            0 :                 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     209            0 :                 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     210            0 :                 info->next++;
     211              : 
     212            0 :                 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     213            0 :                 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     214            0 :         }
     215              : 
     216            0 :         SRF_RETURN_DONE(funcctx);
     217            0 : }
     218              : 
     219              : /*
     220              :  * Visibility map information for every block in a relation, plus the page
     221              :  * level information for each block.
     222              :  */
     223              : Datum
     224            0 : pg_visibility_rel(PG_FUNCTION_ARGS)
     225              : {
     226            0 :         FuncCallContext *funcctx;
     227            0 :         vbits      *info;
     228              : 
     229            0 :         if (SRF_IS_FIRSTCALL())
     230              :         {
     231            0 :                 Oid                     relid = PG_GETARG_OID(0);
     232            0 :                 MemoryContext oldcontext;
     233              : 
     234            0 :                 funcctx = SRF_FIRSTCALL_INIT();
     235            0 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     236            0 :                 funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
     237              :                 /* collect_visibility_data will verify the relkind */
     238            0 :                 funcctx->user_fctx = collect_visibility_data(relid, true);
     239            0 :                 MemoryContextSwitchTo(oldcontext);
     240            0 :         }
     241              : 
     242            0 :         funcctx = SRF_PERCALL_SETUP();
     243            0 :         info = (vbits *) funcctx->user_fctx;
     244              : 
     245            0 :         if (info->next < info->count)
     246              :         {
     247            0 :                 Datum           values[4];
     248            0 :                 bool            nulls[4] = {0};
     249            0 :                 HeapTuple       tuple;
     250              : 
     251            0 :                 values[0] = Int64GetDatum(info->next);
     252            0 :                 values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
     253            0 :                 values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
     254            0 :                 values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
     255            0 :                 info->next++;
     256              : 
     257            0 :                 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     258            0 :                 SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     259            0 :         }
     260              : 
     261            0 :         SRF_RETURN_DONE(funcctx);
     262            0 : }
     263              : 
     264              : /*
     265              :  * Count the number of all-visible and all-frozen pages in the visibility
     266              :  * map for a particular relation.
     267              :  */
     268              : Datum
     269            0 : pg_visibility_map_summary(PG_FUNCTION_ARGS)
     270              : {
     271            0 :         Oid                     relid = PG_GETARG_OID(0);
     272            0 :         Relation        rel;
     273            0 :         BlockNumber all_visible = 0;
     274            0 :         BlockNumber all_frozen = 0;
     275            0 :         TupleDesc       tupdesc;
     276            0 :         Datum           values[2];
     277            0 :         bool            nulls[2] = {0};
     278              : 
     279            0 :         rel = relation_open(relid, AccessShareLock);
     280              : 
     281              :         /* Only some relkinds have a visibility map */
     282            0 :         check_relation_relkind(rel);
     283              : 
     284            0 :         visibilitymap_count(rel, &all_visible, &all_frozen);
     285              : 
     286            0 :         relation_close(rel, AccessShareLock);
     287              : 
     288            0 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     289            0 :                 elog(ERROR, "return type must be a row type");
     290              : 
     291            0 :         values[0] = Int64GetDatum((int64) all_visible);
     292            0 :         values[1] = Int64GetDatum((int64) all_frozen);
     293              : 
     294            0 :         PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
     295            0 : }
     296              : 
     297              : /*
     298              :  * Return the TIDs of non-frozen tuples present in pages marked all-frozen
     299              :  * in the visibility map.  We hope no one will ever find any, but there could
     300              :  * be bugs, database corruption, etc.
     301              :  */
     302              : Datum
     303            0 : pg_check_frozen(PG_FUNCTION_ARGS)
     304              : {
     305            0 :         FuncCallContext *funcctx;
     306            0 :         corrupt_items *items;
     307              : 
     308            0 :         if (SRF_IS_FIRSTCALL())
     309              :         {
     310            0 :                 Oid                     relid = PG_GETARG_OID(0);
     311            0 :                 MemoryContext oldcontext;
     312              : 
     313            0 :                 funcctx = SRF_FIRSTCALL_INIT();
     314            0 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     315              :                 /* collect_corrupt_items will verify the relkind */
     316            0 :                 funcctx->user_fctx = collect_corrupt_items(relid, false, true);
     317            0 :                 MemoryContextSwitchTo(oldcontext);
     318            0 :         }
     319              : 
     320            0 :         funcctx = SRF_PERCALL_SETUP();
     321            0 :         items = (corrupt_items *) funcctx->user_fctx;
     322              : 
     323            0 :         if (items->next < items->count)
     324            0 :                 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     325              : 
     326            0 :         SRF_RETURN_DONE(funcctx);
     327            0 : }
     328              : 
     329              : /*
     330              :  * Return the TIDs of not-all-visible tuples in pages marked all-visible
     331              :  * in the visibility map.  We hope no one will ever find any, but there could
     332              :  * be bugs, database corruption, etc.
     333              :  */
     334              : Datum
     335            0 : pg_check_visible(PG_FUNCTION_ARGS)
     336              : {
     337            0 :         FuncCallContext *funcctx;
     338            0 :         corrupt_items *items;
     339              : 
     340            0 :         if (SRF_IS_FIRSTCALL())
     341              :         {
     342            0 :                 Oid                     relid = PG_GETARG_OID(0);
     343            0 :                 MemoryContext oldcontext;
     344              : 
     345            0 :                 funcctx = SRF_FIRSTCALL_INIT();
     346            0 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     347              :                 /* collect_corrupt_items will verify the relkind */
     348            0 :                 funcctx->user_fctx = collect_corrupt_items(relid, true, false);
     349            0 :                 MemoryContextSwitchTo(oldcontext);
     350            0 :         }
     351              : 
     352            0 :         funcctx = SRF_PERCALL_SETUP();
     353            0 :         items = (corrupt_items *) funcctx->user_fctx;
     354              : 
     355            0 :         if (items->next < items->count)
     356            0 :                 SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
     357              : 
     358            0 :         SRF_RETURN_DONE(funcctx);
     359            0 : }
     360              : 
     361              : /*
     362              :  * Remove the visibility map fork for a relation.  If there turn out to be
     363              :  * any bugs in the visibility map code that require rebuilding the VM, this
     364              :  * provides users with a way to do it that is cleaner than shutting down the
     365              :  * server and removing files by hand.
     366              :  *
     367              :  * This is a cut-down version of RelationTruncate.
     368              :  */
     369              : Datum
     370            0 : pg_truncate_visibility_map(PG_FUNCTION_ARGS)
     371              : {
     372            0 :         Oid                     relid = PG_GETARG_OID(0);
     373            0 :         Relation        rel;
     374            0 :         ForkNumber      fork;
     375            0 :         BlockNumber block;
     376            0 :         BlockNumber old_block;
     377              : 
     378            0 :         rel = relation_open(relid, AccessExclusiveLock);
     379              : 
     380              :         /* Only some relkinds have a visibility map */
     381            0 :         check_relation_relkind(rel);
     382              : 
     383              :         /* Forcibly reset cached file size */
     384            0 :         RelationGetSmgr(rel)->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
     385              : 
     386              :         /* Compute new and old size before entering critical section. */
     387            0 :         fork = VISIBILITYMAP_FORKNUM;
     388            0 :         block = visibilitymap_prepare_truncate(rel, 0);
     389            0 :         old_block = BlockNumberIsValid(block) ? smgrnblocks(RelationGetSmgr(rel), fork) : 0;
     390              : 
     391              :         /*
     392              :          * WAL-logging, buffer dropping, file truncation must be atomic and all on
     393              :          * one side of a checkpoint.  See RelationTruncate() for discussion.
     394              :          */
     395            0 :         Assert((MyProc->delayChkptFlags & (DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE)) == 0);
     396            0 :         MyProc->delayChkptFlags |= DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE;
     397            0 :         START_CRIT_SECTION();
     398              : 
     399            0 :         if (RelationNeedsWAL(rel))
     400              :         {
     401            0 :                 XLogRecPtr      lsn;
     402            0 :                 xl_smgr_truncate xlrec;
     403              : 
     404            0 :                 xlrec.blkno = 0;
     405            0 :                 xlrec.rlocator = rel->rd_locator;
     406            0 :                 xlrec.flags = SMGR_TRUNCATE_VM;
     407              : 
     408            0 :                 XLogBeginInsert();
     409            0 :                 XLogRegisterData(&xlrec, sizeof(xlrec));
     410              : 
     411            0 :                 lsn = XLogInsert(RM_SMGR_ID,
     412              :                                                  XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
     413            0 :                 XLogFlush(lsn);
     414            0 :         }
     415              : 
     416            0 :         if (BlockNumberIsValid(block))
     417            0 :                 smgrtruncate(RelationGetSmgr(rel), &fork, 1, &old_block, &block);
     418              : 
     419            0 :         END_CRIT_SECTION();
     420            0 :         MyProc->delayChkptFlags &= ~(DELAY_CHKPT_START | DELAY_CHKPT_COMPLETE);
     421              : 
     422              :         /*
     423              :          * Release the lock right away, not at commit time.
     424              :          *
     425              :          * It would be a problem to release the lock prior to commit if this
     426              :          * truncate operation sends any transactional invalidation messages. Other
     427              :          * backends would potentially be able to lock the relation without
     428              :          * processing them in the window of time between when we release the lock
     429              :          * here and when we sent the messages at our eventual commit.  However,
     430              :          * we're currently only sending a non-transactional smgr invalidation,
     431              :          * which will have been posted to shared memory immediately from within
     432              :          * smgr_truncate.  Therefore, there should be no race here.
     433              :          *
     434              :          * The reason why it's desirable to release the lock early here is because
     435              :          * of the possibility that someone will need to use this to blow away many
     436              :          * visibility map forks at once.  If we can't release the lock until
     437              :          * commit time, the transaction doing this will accumulate
     438              :          * AccessExclusiveLocks on all of those relations at the same time, which
     439              :          * is undesirable. However, if this turns out to be unsafe we may have no
     440              :          * choice...
     441              :          */
     442            0 :         relation_close(rel, AccessExclusiveLock);
     443              : 
     444              :         /* Nothing to return. */
     445            0 :         PG_RETURN_VOID();
     446            0 : }
     447              : 
     448              : /*
     449              :  * Helper function to construct whichever TupleDesc we need for a particular
     450              :  * call.
     451              :  */
     452              : static TupleDesc
     453            0 : pg_visibility_tupdesc(bool include_blkno, bool include_pd)
     454              : {
     455            0 :         TupleDesc       tupdesc;
     456            0 :         AttrNumber      maxattr = 2;
     457            0 :         AttrNumber      a = 0;
     458              : 
     459            0 :         if (include_blkno)
     460            0 :                 ++maxattr;
     461            0 :         if (include_pd)
     462            0 :                 ++maxattr;
     463            0 :         tupdesc = CreateTemplateTupleDesc(maxattr);
     464            0 :         if (include_blkno)
     465            0 :                 TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
     466            0 :         TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
     467            0 :         TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
     468            0 :         if (include_pd)
     469            0 :                 TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
     470            0 :         Assert(a == maxattr);
     471              : 
     472            0 :         return BlessTupleDesc(tupdesc);
     473            0 : }
     474              : 
     475              : /*
     476              :  * Collect visibility data about a relation.
     477              :  *
     478              :  * Checks relkind of relid and will throw an error if the relation does not
     479              :  * have a VM.
     480              :  */
     481              : static vbits *
     482            0 : collect_visibility_data(Oid relid, bool include_pd)
     483              : {
     484            0 :         Relation        rel;
     485            0 :         BlockNumber nblocks;
     486            0 :         vbits      *info;
     487            0 :         BlockNumber blkno;
     488            0 :         Buffer          vmbuffer = InvalidBuffer;
     489            0 :         BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     490            0 :         BlockRangeReadStreamPrivate p;
     491            0 :         ReadStream *stream = NULL;
     492              : 
     493            0 :         rel = relation_open(relid, AccessShareLock);
     494              : 
     495              :         /* Only some relkinds have a visibility map */
     496            0 :         check_relation_relkind(rel);
     497              : 
     498            0 :         nblocks = RelationGetNumberOfBlocks(rel);
     499            0 :         info = palloc0(offsetof(vbits, bits) + nblocks);
     500            0 :         info->next = 0;
     501            0 :         info->count = nblocks;
     502              : 
     503              :         /* Create a stream if reading main fork. */
     504            0 :         if (include_pd)
     505              :         {
     506            0 :                 p.current_blocknum = 0;
     507            0 :                 p.last_exclusive = nblocks;
     508              : 
     509              :                 /*
     510              :                  * It is safe to use batchmode as block_range_read_stream_cb takes no
     511              :                  * locks.
     512              :                  */
     513            0 :                 stream = read_stream_begin_relation(READ_STREAM_FULL |
     514              :                                                                                         READ_STREAM_USE_BATCHING,
     515            0 :                                                                                         bstrategy,
     516            0 :                                                                                         rel,
     517              :                                                                                         MAIN_FORKNUM,
     518              :                                                                                         block_range_read_stream_cb,
     519              :                                                                                         &p,
     520              :                                                                                         0);
     521            0 :         }
     522              : 
     523            0 :         for (blkno = 0; blkno < nblocks; ++blkno)
     524              :         {
     525            0 :                 int32           mapbits;
     526              : 
     527              :                 /* Make sure we are interruptible. */
     528            0 :                 CHECK_FOR_INTERRUPTS();
     529              : 
     530              :                 /* Get map info. */
     531            0 :                 mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
     532            0 :                 if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
     533            0 :                         info->bits[blkno] |= (1 << 0);
     534            0 :                 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
     535            0 :                         info->bits[blkno] |= (1 << 1);
     536              : 
     537              :                 /*
     538              :                  * Page-level data requires reading every block, so only get it if the
     539              :                  * caller needs it.  Use a buffer access strategy, too, to prevent
     540              :                  * cache-trashing.
     541              :                  */
     542            0 :                 if (include_pd)
     543              :                 {
     544            0 :                         Buffer          buffer;
     545            0 :                         Page            page;
     546              : 
     547            0 :                         buffer = read_stream_next_buffer(stream, NULL);
     548            0 :                         LockBuffer(buffer, BUFFER_LOCK_SHARE);
     549              : 
     550            0 :                         page = BufferGetPage(buffer);
     551            0 :                         if (PageIsAllVisible(page))
     552            0 :                                 info->bits[blkno] |= (1 << 2);
     553              : 
     554            0 :                         UnlockReleaseBuffer(buffer);
     555            0 :                 }
     556            0 :         }
     557              : 
     558            0 :         if (include_pd)
     559              :         {
     560            0 :                 Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
     561            0 :                 read_stream_end(stream);
     562            0 :         }
     563              : 
     564              :         /* Clean up. */
     565            0 :         if (vmbuffer != InvalidBuffer)
     566            0 :                 ReleaseBuffer(vmbuffer);
     567            0 :         relation_close(rel, AccessShareLock);
     568              : 
     569            0 :         return info;
     570            0 : }
     571              : 
     572              : /*
     573              :  * The "strict" version of GetOldestNonRemovableTransactionId().  The
     574              :  * pg_visibility check can tolerate false positives (don't report some of the
     575              :  * errors), but can't tolerate false negatives (report false errors). Normally,
     576              :  * horizons move forwards, but there are cases when it could move backward
     577              :  * (see comment for ComputeXidHorizons()).
     578              :  *
     579              :  * This is why we have to implement our own function for xid horizon, which
     580              :  * would be guaranteed to be newer or equal to any xid horizon computed before.
     581              :  * We have to do the following to achieve this.
     582              :  *
     583              :  * 1. Ignore processes xmin's, because they consider connection to other
     584              :  *    databases that were ignored before.
     585              :  * 2. Ignore KnownAssignedXids, as they are not database-aware. Although we
     586              :  *    now perform minimal checking on a standby by always using nextXid, this
     587              :  *    approach is better than nothing and will at least catch extremely broken
     588              :  *    cases where a xid is in the future.
     589              :  * 3. Ignore walsender xmin, because it could go backward if some replication
     590              :  *    connections don't use replication slots.
     591              :  *
     592              :  * While it might seem like we could use KnownAssignedXids for shared
     593              :  * catalogs, since shared catalogs rely on a global horizon rather than a
     594              :  * database-specific one - there are potential edge cases.  For example, a
     595              :  * transaction may crash on the primary without writing a commit/abort record.
     596              :  * This would lead to a situation where it appears to still be running on the
     597              :  * standby, even though it has already ended on the primary.  For this reason,
     598              :  * it's safer to ignore KnownAssignedXids, even for shared catalogs.
     599              :  *
     600              :  * As a result, we're using only currently running xids to compute the horizon.
     601              :  * Surely these would significantly sacrifice accuracy.  But we have to do so
     602              :  * to avoid reporting false errors.
     603              :  */
     604              : static TransactionId
     605            0 : GetStrictOldestNonRemovableTransactionId(Relation rel)
     606              : {
     607            0 :         RunningTransactions runningTransactions;
     608              : 
     609            0 :         if (RecoveryInProgress())
     610              :         {
     611            0 :                 TransactionId result;
     612              : 
     613              :                 /* As we ignore KnownAssignedXids on standby, just pick nextXid */
     614            0 :                 LWLockAcquire(XidGenLock, LW_SHARED);
     615            0 :                 result = XidFromFullTransactionId(TransamVariables->nextXid);
     616            0 :                 LWLockRelease(XidGenLock);
     617            0 :                 return result;
     618            0 :         }
     619            0 :         else if (rel == NULL || rel->rd_rel->relisshared)
     620              :         {
     621              :                 /* Shared relation: take into account all running xids */
     622            0 :                 runningTransactions = GetRunningTransactionData();
     623            0 :                 LWLockRelease(ProcArrayLock);
     624            0 :                 LWLockRelease(XidGenLock);
     625            0 :                 return runningTransactions->oldestRunningXid;
     626              :         }
     627            0 :         else if (!RELATION_IS_LOCAL(rel))
     628              :         {
     629              :                 /*
     630              :                  * Normal relation: take into account xids running within the current
     631              :                  * database
     632              :                  */
     633            0 :                 runningTransactions = GetRunningTransactionData();
     634            0 :                 LWLockRelease(ProcArrayLock);
     635            0 :                 LWLockRelease(XidGenLock);
     636            0 :                 return runningTransactions->oldestDatabaseRunningXid;
     637              :         }
     638              :         else
     639              :         {
     640              :                 /*
     641              :                  * For temporary relations, ComputeXidHorizons() uses only
     642              :                  * TransamVariables->latestCompletedXid and MyProc->xid.  These two
     643              :                  * shouldn't go backwards.  So we're fine with this horizon.
     644              :                  */
     645            0 :                 return GetOldestNonRemovableTransactionId(rel);
     646              :         }
     647            0 : }
     648              : 
     649              : /*
     650              :  * Callback function to get next block for read stream object used in
     651              :  * collect_corrupt_items() function.
     652              :  */
     653              : static BlockNumber
     654            0 : collect_corrupt_items_read_stream_next_block(ReadStream *stream,
     655              :                                                                                          void *callback_private_data,
     656              :                                                                                          void *per_buffer_data)
     657              : {
     658            0 :         struct collect_corrupt_items_read_stream_private *p = callback_private_data;
     659              : 
     660            0 :         for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++)
     661              :         {
     662            0 :                 bool            check_frozen = false;
     663            0 :                 bool            check_visible = false;
     664              : 
     665              :                 /* Make sure we are interruptible. */
     666            0 :                 CHECK_FOR_INTERRUPTS();
     667              : 
     668            0 :                 if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, &p->vmbuffer))
     669            0 :                         check_frozen = true;
     670            0 :                 if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, &p->vmbuffer))
     671            0 :                         check_visible = true;
     672            0 :                 if (!check_visible && !check_frozen)
     673            0 :                         continue;
     674              : 
     675            0 :                 return p->current_blocknum++;
     676            0 :         }
     677              : 
     678            0 :         return InvalidBlockNumber;
     679            0 : }
     680              : 
     681              : /*
     682              :  * Returns a list of items whose visibility map information does not match
     683              :  * the status of the tuples on the page.
     684              :  *
     685              :  * If all_visible is passed as true, this will include all items which are
     686              :  * on pages marked as all-visible in the visibility map but which do not
     687              :  * seem to in fact be all-visible.
     688              :  *
     689              :  * If all_frozen is passed as true, this will include all items which are
     690              :  * on pages marked as all-frozen but which do not seem to in fact be frozen.
     691              :  *
     692              :  * Checks relkind of relid and will throw an error if the relation does not
     693              :  * have a VM.
     694              :  */
     695              : static corrupt_items *
     696            0 : collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
     697              : {
     698            0 :         Relation        rel;
     699            0 :         corrupt_items *items;
     700            0 :         Buffer          vmbuffer = InvalidBuffer;
     701            0 :         BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
     702            0 :         TransactionId OldestXmin = InvalidTransactionId;
     703            0 :         struct collect_corrupt_items_read_stream_private p;
     704            0 :         ReadStream *stream;
     705            0 :         Buffer          buffer;
     706              : 
     707            0 :         rel = relation_open(relid, AccessShareLock);
     708              : 
     709              :         /* Only some relkinds have a visibility map */
     710            0 :         check_relation_relkind(rel);
     711              : 
     712            0 :         if (all_visible)
     713            0 :                 OldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     714              : 
     715              :         /*
     716              :          * Guess an initial array size. We don't expect many corrupted tuples, so
     717              :          * start with a small array.  This function uses the "next" field to track
     718              :          * the next offset where we can store an item (which is the same thing as
     719              :          * the number of items found so far) and the "count" field to track the
     720              :          * number of entries allocated.  We'll repurpose these fields before
     721              :          * returning.
     722              :          */
     723            0 :         items = palloc0_object(corrupt_items);
     724            0 :         items->next = 0;
     725            0 :         items->count = 64;
     726            0 :         items->tids = palloc(items->count * sizeof(ItemPointerData));
     727              : 
     728            0 :         p.current_blocknum = 0;
     729            0 :         p.last_exclusive = RelationGetNumberOfBlocks(rel);
     730            0 :         p.rel = rel;
     731            0 :         p.vmbuffer = InvalidBuffer;
     732            0 :         p.all_frozen = all_frozen;
     733            0 :         p.all_visible = all_visible;
     734            0 :         stream = read_stream_begin_relation(READ_STREAM_FULL,
     735            0 :                                                                                 bstrategy,
     736            0 :                                                                                 rel,
     737              :                                                                                 MAIN_FORKNUM,
     738              :                                                                                 collect_corrupt_items_read_stream_next_block,
     739              :                                                                                 &p,
     740              :                                                                                 0);
     741              : 
     742              :         /* Loop over every block in the relation. */
     743            0 :         while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
     744              :         {
     745            0 :                 bool            check_frozen = all_frozen;
     746            0 :                 bool            check_visible = all_visible;
     747            0 :                 Page            page;
     748            0 :                 OffsetNumber offnum,
     749              :                                         maxoff;
     750            0 :                 BlockNumber blkno;
     751              : 
     752              :                 /* Make sure we are interruptible. */
     753            0 :                 CHECK_FOR_INTERRUPTS();
     754              : 
     755            0 :                 LockBuffer(buffer, BUFFER_LOCK_SHARE);
     756              : 
     757            0 :                 page = BufferGetPage(buffer);
     758            0 :                 maxoff = PageGetMaxOffsetNumber(page);
     759            0 :                 blkno = BufferGetBlockNumber(buffer);
     760              : 
     761              :                 /*
     762              :                  * The visibility map bits might have changed while we were acquiring
     763              :                  * the page lock.  Recheck to avoid returning spurious results.
     764              :                  */
     765            0 :                 if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
     766            0 :                         check_frozen = false;
     767            0 :                 if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
     768            0 :                         check_visible = false;
     769            0 :                 if (!check_visible && !check_frozen)
     770              :                 {
     771            0 :                         UnlockReleaseBuffer(buffer);
     772            0 :                         continue;
     773              :                 }
     774              : 
     775              :                 /* Iterate over each tuple on the page. */
     776            0 :                 for (offnum = FirstOffsetNumber;
     777            0 :                          offnum <= maxoff;
     778            0 :                          offnum = OffsetNumberNext(offnum))
     779              :                 {
     780            0 :                         HeapTupleData tuple;
     781            0 :                         ItemId          itemid;
     782              : 
     783            0 :                         itemid = PageGetItemId(page, offnum);
     784              : 
     785              :                         /* Unused or redirect line pointers are of no interest. */
     786            0 :                         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
     787            0 :                                 continue;
     788              : 
     789              :                         /* Dead line pointers are neither all-visible nor frozen. */
     790            0 :                         if (ItemIdIsDead(itemid))
     791              :                         {
     792            0 :                                 ItemPointerSet(&(tuple.t_self), blkno, offnum);
     793            0 :                                 record_corrupt_item(items, &tuple.t_self);
     794            0 :                                 continue;
     795              :                         }
     796              : 
     797              :                         /* Initialize a HeapTupleData structure for checks below. */
     798            0 :                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
     799            0 :                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
     800            0 :                         tuple.t_len = ItemIdGetLength(itemid);
     801            0 :                         tuple.t_tableOid = relid;
     802              : 
     803              :                         /*
     804              :                          * If we're checking whether the page is all-visible, we expect
     805              :                          * the tuple to be all-visible.
     806              :                          */
     807            0 :                         if (check_visible &&
     808            0 :                                 !tuple_all_visible(&tuple, OldestXmin, buffer))
     809              :                         {
     810            0 :                                 TransactionId RecomputedOldestXmin;
     811              : 
     812              :                                 /*
     813              :                                  * Time has passed since we computed OldestXmin, so it's
     814              :                                  * possible that this tuple is all-visible in reality even
     815              :                                  * though it doesn't appear so based on our
     816              :                                  * previously-computed value.  Let's compute a new value so we
     817              :                                  * can be certain whether there is a problem.
     818              :                                  *
     819              :                                  * From a concurrency point of view, it sort of sucks to
     820              :                                  * retake ProcArrayLock here while we're holding the buffer
     821              :                                  * locked in shared mode, but it should be safe against
     822              :                                  * deadlocks, because surely
     823              :                                  * GetStrictOldestNonRemovableTransactionId() should never
     824              :                                  * take a buffer lock. And this shouldn't happen often, so
     825              :                                  * it's worth being careful so as to avoid false positives.
     826              :                                  */
     827            0 :                                 RecomputedOldestXmin = GetStrictOldestNonRemovableTransactionId(rel);
     828              : 
     829            0 :                                 if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
     830            0 :                                         record_corrupt_item(items, &tuple.t_self);
     831              :                                 else
     832              :                                 {
     833            0 :                                         OldestXmin = RecomputedOldestXmin;
     834            0 :                                         if (!tuple_all_visible(&tuple, OldestXmin, buffer))
     835            0 :                                                 record_corrupt_item(items, &tuple.t_self);
     836              :                                 }
     837            0 :                         }
     838              : 
     839              :                         /*
     840              :                          * If we're checking whether the page is all-frozen, we expect the
     841              :                          * tuple to be in a state where it will never need freezing.
     842              :                          */
     843            0 :                         if (check_frozen)
     844              :                         {
     845            0 :                                 if (heap_tuple_needs_eventual_freeze(tuple.t_data))
     846            0 :                                         record_corrupt_item(items, &tuple.t_self);
     847            0 :                         }
     848            0 :                 }
     849              : 
     850            0 :                 UnlockReleaseBuffer(buffer);
     851            0 :         }
     852            0 :         read_stream_end(stream);
     853              : 
     854              :         /* Clean up. */
     855            0 :         if (vmbuffer != InvalidBuffer)
     856            0 :                 ReleaseBuffer(vmbuffer);
     857            0 :         if (p.vmbuffer != InvalidBuffer)
     858            0 :                 ReleaseBuffer(p.vmbuffer);
     859            0 :         relation_close(rel, AccessShareLock);
     860              : 
     861              :         /*
     862              :          * Before returning, repurpose the fields to match caller's expectations.
     863              :          * next is now the next item that should be read (rather than written) and
     864              :          * count is now the number of items we wrote (rather than the number we
     865              :          * allocated).
     866              :          */
     867            0 :         items->count = items->next;
     868            0 :         items->next = 0;
     869              : 
     870            0 :         return items;
     871            0 : }
     872              : 
     873              : /*
     874              :  * Remember one corrupt item.
     875              :  */
     876              : static void
     877            0 : record_corrupt_item(corrupt_items *items, ItemPointer tid)
     878              : {
     879              :         /* enlarge output array if needed. */
     880            0 :         if (items->next >= items->count)
     881              :         {
     882            0 :                 items->count *= 2;
     883            0 :                 items->tids = repalloc(items->tids,
     884            0 :                                                            items->count * sizeof(ItemPointerData));
     885            0 :         }
     886              :         /* and add the new item */
     887            0 :         items->tids[items->next++] = *tid;
     888            0 : }
     889              : 
     890              : /*
     891              :  * Check whether a tuple is all-visible relative to a given OldestXmin value.
     892              :  * The buffer should contain the tuple and should be locked and pinned.
     893              :  */
     894              : static bool
     895            0 : tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
     896              : {
     897            0 :         HTSV_Result state;
     898            0 :         TransactionId xmin;
     899              : 
     900            0 :         state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
     901            0 :         if (state != HEAPTUPLE_LIVE)
     902            0 :                 return false;                   /* all-visible implies live */
     903              : 
     904              :         /*
     905              :          * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
     906              :          * all-visible unless every tuple is hinted committed. However, those hint
     907              :          * bits could be lost after a crash, so we can't be certain that they'll
     908              :          * be set here.  So just check the xmin.
     909              :          */
     910              : 
     911            0 :         xmin = HeapTupleHeaderGetXmin(tup->t_data);
     912            0 :         if (!TransactionIdPrecedes(xmin, OldestXmin))
     913            0 :                 return false;                   /* xmin not old enough for all to see */
     914              : 
     915            0 :         return true;
     916            0 : }
     917              : 
     918              : /*
     919              :  * check_relation_relkind - convenience routine to check that relation
     920              :  * is of the relkind supported by the callers
     921              :  */
     922              : static void
     923            0 : check_relation_relkind(Relation rel)
     924              : {
     925            0 :         if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
     926            0 :                 ereport(ERROR,
     927              :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     928              :                                  errmsg("relation \"%s\" is of wrong relation kind",
     929              :                                                 RelationGetRelationName(rel)),
     930              :                                  errdetail_relkind_not_supported(rel->rd_rel->relkind)));
     931            0 : }
        

Generated by: LCOV version 2.3.2-1