LCOV - code coverage report
Current view: top level - src/common - blkreftable.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 481 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 22 0
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 0.0 % 209 0

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * blkreftable.c
       4                 :             :  *        Block reference tables.
       5                 :             :  *
       6                 :             :  * A block reference table is used to keep track of which blocks have
       7                 :             :  * been modified by WAL records within a certain LSN range.
       8                 :             :  *
       9                 :             :  * For each relation fork, we keep track of all blocks that have appeared
      10                 :             :  * in block reference in the WAL. We also keep track of the "limit block",
      11                 :             :  * which is the smallest relation length in blocks known to have occurred
      12                 :             :  * during that range of WAL records.  This should be set to 0 if the relation
      13                 :             :  * fork is created or destroyed, and to the post-truncation length if
      14                 :             :  * truncated.
      15                 :             :  *
      16                 :             :  * Whenever we set the limit block, we also forget about any modified blocks
      17                 :             :  * beyond that point. Those blocks don't exist any more. Such blocks can
      18                 :             :  * later be marked as modified again; if that happens, it means the relation
      19                 :             :  * was re-extended.
      20                 :             :  *
      21                 :             :  * Portions Copyright (c) 2010-2026, PostgreSQL Global Development Group
      22                 :             :  *
      23                 :             :  * src/common/blkreftable.c
      24                 :             :  *
      25                 :             :  *-------------------------------------------------------------------------
      26                 :             :  */
      27                 :             : 
      28                 :             : 
      29                 :             : #ifndef FRONTEND
      30                 :             : #include "postgres.h"
      31                 :             : #else
      32                 :             : #include "postgres_fe.h"
      33                 :             : #endif
      34                 :             : 
      35                 :             : #ifdef FRONTEND
      36                 :             : #include "common/logging.h"
      37                 :             : #endif
      38                 :             : 
      39                 :             : #include "common/blkreftable.h"
      40                 :             : #include "common/hashfn.h"
      41                 :             : #include "port/pg_crc32c.h"
      42                 :             : 
      43                 :             : /*
      44                 :             :  * A block reference table keeps track of the status of each relation
      45                 :             :  * fork individually.
      46                 :             :  */
      47                 :             : typedef struct BlockRefTableKey
      48                 :             : {
      49                 :             :         RelFileLocator rlocator;
      50                 :             :         ForkNumber      forknum;
      51                 :             : } BlockRefTableKey;
      52                 :             : 
      53                 :             : /*
      54                 :             :  * We could need to store data either for a relation in which only a
      55                 :             :  * tiny fraction of the blocks have been modified or for a relation in
      56                 :             :  * which nearly every block has been modified, and we want a
      57                 :             :  * space-efficient representation in both cases. To accomplish this,
      58                 :             :  * we divide the relation into chunks of 2^16 blocks and choose between
      59                 :             :  * an array representation and a bitmap representation for each chunk.
      60                 :             :  *
      61                 :             :  * When the number of modified blocks in a given chunk is small, we
      62                 :             :  * essentially store an array of block numbers, but we need not store the
      63                 :             :  * entire block number: instead, we store each block number as a 2-byte
      64                 :             :  * offset from the start of the chunk.
      65                 :             :  *
      66                 :             :  * When the number of modified blocks in a given chunk is large, we switch
      67                 :             :  * to a bitmap representation.
      68                 :             :  *
      69                 :             :  * These same basic representational choices are used both when a block
      70                 :             :  * reference table is stored in memory and when it is serialized to disk.
      71                 :             :  *
      72                 :             :  * In the in-memory representation, we initially allocate each chunk with
      73                 :             :  * space for a number of entries given by INITIAL_ENTRIES_PER_CHUNK and
      74                 :             :  * increase that as necessary until we reach MAX_ENTRIES_PER_CHUNK.
      75                 :             :  * Any chunk whose allocated size reaches MAX_ENTRIES_PER_CHUNK is converted
      76                 :             :  * to a bitmap, and thus never needs to grow further.
      77                 :             :  */
      78                 :             : #define BLOCKS_PER_CHUNK                (1 << 16)
      79                 :             : #define BLOCKS_PER_ENTRY                (BITS_PER_BYTE * sizeof(uint16))
      80                 :             : #define MAX_ENTRIES_PER_CHUNK   (BLOCKS_PER_CHUNK / BLOCKS_PER_ENTRY)
      81                 :             : #define INITIAL_ENTRIES_PER_CHUNK       16
      82                 :             : typedef uint16 *BlockRefTableChunk;
      83                 :             : 
      84                 :             : /*
      85                 :             :  * State for one relation fork.
      86                 :             :  *
      87                 :             :  * 'rlocator' and 'forknum' identify the relation fork to which this entry
      88                 :             :  * pertains.
      89                 :             :  *
      90                 :             :  * 'limit_block' is the shortest known length of the relation in blocks
      91                 :             :  * within the LSN range covered by a particular block reference table.
      92                 :             :  * It should be set to 0 if the relation fork is created or dropped. If the
      93                 :             :  * relation fork is truncated, it should be set to the number of blocks that
      94                 :             :  * remain after truncation.
      95                 :             :  *
      96                 :             :  * 'nchunks' is the allocated length of each of the three arrays that follow.
      97                 :             :  * We can only represent the status of block numbers less than nchunks *
      98                 :             :  * BLOCKS_PER_CHUNK.
      99                 :             :  *
     100                 :             :  * 'chunk_size' is an array storing the allocated size of each chunk.
     101                 :             :  *
     102                 :             :  * 'chunk_usage' is an array storing the number of elements used in each
     103                 :             :  * chunk. If that value is less than MAX_ENTRIES_PER_CHUNK, the corresponding
     104                 :             :  * chunk is used as an array; else the corresponding chunk is used as a bitmap.
     105                 :             :  * When used as a bitmap, the least significant bit of the first array element
     106                 :             :  * is the status of the lowest-numbered block covered by this chunk.
     107                 :             :  *
     108                 :             :  * 'chunk_data' is the array of chunks.
     109                 :             :  */
     110                 :             : struct BlockRefTableEntry
     111                 :             : {
     112                 :             :         BlockRefTableKey key;
     113                 :             :         BlockNumber limit_block;
     114                 :             :         char            status;
     115                 :             :         uint32          nchunks;
     116                 :             :         uint16     *chunk_size;
     117                 :             :         uint16     *chunk_usage;
     118                 :             :         BlockRefTableChunk *chunk_data;
     119                 :             : };
     120                 :             : 
     121                 :             : /* Declare and define a hash table over type BlockRefTableEntry. */
     122                 :             : #define SH_PREFIX blockreftable
     123                 :             : #define SH_ELEMENT_TYPE BlockRefTableEntry
     124                 :             : #define SH_KEY_TYPE BlockRefTableKey
     125                 :             : #define SH_KEY key
     126                 :             : #define SH_HASH_KEY(tb, key) \
     127                 :             :         hash_bytes((const unsigned char *) &key, sizeof(BlockRefTableKey))
     128                 :             : #define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(BlockRefTableKey)) == 0)
     129                 :             : #define SH_SCOPE static inline
     130                 :             : #ifdef FRONTEND
     131                 :             : #define SH_RAW_ALLOCATOR pg_malloc0
     132                 :             : #endif
     133                 :             : #define SH_DEFINE
     134                 :             : #define SH_DECLARE
     135                 :             : #include "lib/simplehash.h"
     136                 :             : 
     137                 :             : /*
     138                 :             :  * A block reference table is basically just the hash table, but we don't
     139                 :             :  * want to expose that to outside callers.
     140                 :             :  *
     141                 :             :  * We keep track of the memory context in use explicitly too, so that it's
     142                 :             :  * easy to place all of our allocations in the same context.
     143                 :             :  */
     144                 :             : struct BlockRefTable
     145                 :             : {
     146                 :             :         blockreftable_hash *hash;
     147                 :             : #ifndef FRONTEND
     148                 :             :         MemoryContext mcxt;
     149                 :             : #endif
     150                 :             : };
     151                 :             : 
     152                 :             : /*
     153                 :             :  * On-disk serialization format for block reference table entries.
     154                 :             :  */
     155                 :             : typedef struct BlockRefTableSerializedEntry
     156                 :             : {
     157                 :             :         RelFileLocator rlocator;
     158                 :             :         ForkNumber      forknum;
     159                 :             :         BlockNumber limit_block;
     160                 :             :         uint32          nchunks;
     161                 :             : } BlockRefTableSerializedEntry;
     162                 :             : 
     163                 :             : /*
     164                 :             :  * Buffer size, so that we avoid doing many small I/Os.
     165                 :             :  */
     166                 :             : #define BUFSIZE                                 65536
     167                 :             : 
     168                 :             : /*
     169                 :             :  * Ad-hoc buffer for file I/O.
     170                 :             :  */
     171                 :             : typedef struct BlockRefTableBuffer
     172                 :             : {
     173                 :             :         io_callback_fn io_callback;
     174                 :             :         void       *io_callback_arg;
     175                 :             :         char            data[BUFSIZE];
     176                 :             :         int                     used;
     177                 :             :         int                     cursor;
     178                 :             :         pg_crc32c       crc;
     179                 :             : } BlockRefTableBuffer;
     180                 :             : 
     181                 :             : /*
     182                 :             :  * State for keeping track of progress while incrementally reading a block
     183                 :             :  * table reference file from disk.
     184                 :             :  *
     185                 :             :  * total_chunks means the number of chunks for the RelFileLocator/ForkNumber
     186                 :             :  * combination that is currently being read, and consumed_chunks is the number
     187                 :             :  * of those that have been read. (We always read all the information for
     188                 :             :  * a single chunk at one time, so we don't need to be able to represent the
     189                 :             :  * state where a chunk has been partially read.)
     190                 :             :  *
     191                 :             :  * chunk_size is the array of chunk sizes. The length is given by total_chunks.
     192                 :             :  *
     193                 :             :  * chunk_data holds the current chunk.
     194                 :             :  *
     195                 :             :  * chunk_position helps us figure out how much progress we've made in returning
     196                 :             :  * the block numbers for the current chunk to the caller. If the chunk is a
     197                 :             :  * bitmap, it's the number of bits we've scanned; otherwise, it's the number
     198                 :             :  * of chunk entries we've scanned.
     199                 :             :  */
     200                 :             : struct BlockRefTableReader
     201                 :             : {
     202                 :             :         BlockRefTableBuffer buffer;
     203                 :             :         char       *error_filename;
     204                 :             :         report_error_fn error_callback;
     205                 :             :         void       *error_callback_arg;
     206                 :             :         uint32          total_chunks;
     207                 :             :         uint32          consumed_chunks;
     208                 :             :         uint16     *chunk_size;
     209                 :             :         uint16          chunk_data[MAX_ENTRIES_PER_CHUNK];
     210                 :             :         uint32          chunk_position;
     211                 :             : };
     212                 :             : 
     213                 :             : /*
     214                 :             :  * State for keeping track of progress while incrementally writing a block
     215                 :             :  * reference table file to disk.
     216                 :             :  */
     217                 :             : struct BlockRefTableWriter
     218                 :             : {
     219                 :             :         BlockRefTableBuffer buffer;
     220                 :             : };
     221                 :             : 
     222                 :             : /* Function prototypes. */
     223                 :             : static int      BlockRefTableComparator(const void *a, const void *b);
     224                 :             : static void BlockRefTableFlush(BlockRefTableBuffer *buffer);
     225                 :             : static void BlockRefTableRead(BlockRefTableReader *reader, void *data,
     226                 :             :                                                           int length);
     227                 :             : static void BlockRefTableWrite(BlockRefTableBuffer *buffer, void *data,
     228                 :             :                                                            int length);
     229                 :             : static void BlockRefTableFileTerminate(BlockRefTableBuffer *buffer);
     230                 :             : 
     231                 :             : /*
     232                 :             :  * Create an empty block reference table.
     233                 :             :  */
     234                 :             : BlockRefTable *
     235                 :           0 : CreateEmptyBlockRefTable(void)
     236                 :             : {
     237                 :           0 :         BlockRefTable *brtab = palloc_object(BlockRefTable);
     238                 :             : 
     239                 :             :         /*
     240                 :             :          * Even completely empty database has a few hundred relation forks, so it
     241                 :             :          * seems best to size the hash on the assumption that we're going to have
     242                 :             :          * at least a few thousand entries.
     243                 :             :          */
     244                 :             : #ifdef FRONTEND
     245                 :           0 :         brtab->hash = blockreftable_create(4096, NULL);
     246                 :             : #else
     247                 :           0 :         brtab->mcxt = CurrentMemoryContext;
     248                 :           0 :         brtab->hash = blockreftable_create(brtab->mcxt, 4096, NULL);
     249                 :             : #endif
     250                 :             : 
     251                 :           0 :         return brtab;
     252                 :           0 : }
     253                 :             : 
     254                 :             : /*
     255                 :             :  * Set the "limit block" for a relation fork and forget any modified blocks
     256                 :             :  * with equal or higher block numbers.
     257                 :             :  *
     258                 :             :  * The "limit block" is the shortest known length of the relation within the
     259                 :             :  * range of WAL records covered by this block reference table.
     260                 :             :  */
     261                 :             : void
     262                 :           0 : BlockRefTableSetLimitBlock(BlockRefTable *brtab,
     263                 :             :                                                    const RelFileLocator *rlocator,
     264                 :             :                                                    ForkNumber forknum,
     265                 :             :                                                    BlockNumber limit_block)
     266                 :             : {
     267                 :           0 :         BlockRefTableEntry *brtentry;
     268                 :           0 :         BlockRefTableKey key = {0}; /* make sure any padding is zero */
     269                 :           0 :         bool            found;
     270                 :             : 
     271                 :           0 :         memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
     272                 :           0 :         key.forknum = forknum;
     273                 :           0 :         brtentry = blockreftable_insert(brtab->hash, key, &found);
     274                 :             : 
     275         [ #  # ]:           0 :         if (!found)
     276                 :             :         {
     277                 :             :                 /*
     278                 :             :                  * We have no existing data about this relation fork, so just record
     279                 :             :                  * the limit_block value supplied by the caller, and make sure other
     280                 :             :                  * parts of the entry are properly initialized.
     281                 :             :                  */
     282                 :           0 :                 brtentry->limit_block = limit_block;
     283                 :           0 :                 brtentry->nchunks = 0;
     284                 :           0 :                 brtentry->chunk_size = NULL;
     285                 :           0 :                 brtentry->chunk_usage = NULL;
     286                 :           0 :                 brtentry->chunk_data = NULL;
     287                 :           0 :                 return;
     288                 :             :         }
     289                 :             : 
     290                 :           0 :         BlockRefTableEntrySetLimitBlock(brtentry, limit_block);
     291         [ #  # ]:           0 : }
     292                 :             : 
     293                 :             : /*
     294                 :             :  * Mark a block in a given relation fork as known to have been modified.
     295                 :             :  */
     296                 :             : void
     297                 :           0 : BlockRefTableMarkBlockModified(BlockRefTable *brtab,
     298                 :             :                                                            const RelFileLocator *rlocator,
     299                 :             :                                                            ForkNumber forknum,
     300                 :             :                                                            BlockNumber blknum)
     301                 :             : {
     302                 :           0 :         BlockRefTableEntry *brtentry;
     303                 :           0 :         BlockRefTableKey key = {0}; /* make sure any padding is zero */
     304                 :           0 :         bool            found;
     305                 :             : #ifndef FRONTEND
     306                 :           0 :         MemoryContext oldcontext = MemoryContextSwitchTo(brtab->mcxt);
     307                 :             : #endif
     308                 :             : 
     309                 :           0 :         memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
     310                 :           0 :         key.forknum = forknum;
     311                 :           0 :         brtentry = blockreftable_insert(brtab->hash, key, &found);
     312                 :             : 
     313         [ #  # ]:           0 :         if (!found)
     314                 :             :         {
     315                 :             :                 /*
     316                 :             :                  * We want to set the initial limit block value to something higher
     317                 :             :                  * than any legal block number. InvalidBlockNumber fits the bill.
     318                 :             :                  */
     319                 :           0 :                 brtentry->limit_block = InvalidBlockNumber;
     320                 :           0 :                 brtentry->nchunks = 0;
     321                 :           0 :                 brtentry->chunk_size = NULL;
     322                 :           0 :                 brtentry->chunk_usage = NULL;
     323                 :           0 :                 brtentry->chunk_data = NULL;
     324                 :           0 :         }
     325                 :             : 
     326                 :           0 :         BlockRefTableEntryMarkBlockModified(brtentry, forknum, blknum);
     327                 :             : 
     328                 :             : #ifndef FRONTEND
     329                 :           0 :         MemoryContextSwitchTo(oldcontext);
     330                 :             : #endif
     331                 :           0 : }
     332                 :             : 
     333                 :             : /*
     334                 :             :  * Get an entry from a block reference table.
     335                 :             :  *
     336                 :             :  * If the entry does not exist, this function returns NULL. Otherwise, it
     337                 :             :  * returns the entry and sets *limit_block to the value from the entry.
     338                 :             :  */
     339                 :             : BlockRefTableEntry *
     340                 :           0 : BlockRefTableGetEntry(BlockRefTable *brtab, const RelFileLocator *rlocator,
     341                 :             :                                           ForkNumber forknum, BlockNumber *limit_block)
     342                 :             : {
     343                 :           0 :         BlockRefTableKey key = {0}; /* make sure any padding is zero */
     344                 :           0 :         BlockRefTableEntry *entry;
     345                 :             : 
     346         [ #  # ]:           0 :         Assert(limit_block != NULL);
     347                 :             : 
     348                 :           0 :         memcpy(&key.rlocator, rlocator, sizeof(RelFileLocator));
     349                 :           0 :         key.forknum = forknum;
     350                 :           0 :         entry = blockreftable_lookup(brtab->hash, key);
     351                 :             : 
     352         [ #  # ]:           0 :         if (entry != NULL)
     353                 :           0 :                 *limit_block = entry->limit_block;
     354                 :             : 
     355                 :           0 :         return entry;
     356                 :           0 : }
     357                 :             : 
     358                 :             : /*
     359                 :             :  * Get block numbers from a table entry.
     360                 :             :  *
     361                 :             :  * 'blocks' must point to enough space to hold at least 'nblocks' block
     362                 :             :  * numbers, and any block numbers we manage to get will be written there.
     363                 :             :  * The return value is the number of block numbers actually written.
     364                 :             :  *
     365                 :             :  * We do not return block numbers unless they are greater than or equal to
     366                 :             :  * start_blkno and strictly less than stop_blkno.
     367                 :             :  */
     368                 :             : int
     369                 :           0 : BlockRefTableEntryGetBlocks(BlockRefTableEntry *entry,
     370                 :             :                                                         BlockNumber start_blkno,
     371                 :             :                                                         BlockNumber stop_blkno,
     372                 :             :                                                         BlockNumber *blocks,
     373                 :             :                                                         int nblocks)
     374                 :             : {
     375                 :           0 :         uint32          start_chunkno;
     376                 :           0 :         uint32          stop_chunkno;
     377                 :           0 :         uint32          chunkno;
     378                 :           0 :         int                     nresults = 0;
     379                 :             : 
     380         [ #  # ]:           0 :         Assert(entry != NULL);
     381                 :             : 
     382                 :             :         /*
     383                 :             :          * Figure out which chunks could potentially contain blocks of interest.
     384                 :             :          *
     385                 :             :          * We need to be careful about overflow here, because stop_blkno could be
     386                 :             :          * InvalidBlockNumber or something very close to it.
     387                 :             :          */
     388                 :           0 :         start_chunkno = start_blkno / BLOCKS_PER_CHUNK;
     389                 :           0 :         stop_chunkno = stop_blkno / BLOCKS_PER_CHUNK;
     390         [ #  # ]:           0 :         if ((stop_blkno % BLOCKS_PER_CHUNK) != 0)
     391                 :           0 :                 ++stop_chunkno;
     392         [ #  # ]:           0 :         if (stop_chunkno > entry->nchunks)
     393                 :           0 :                 stop_chunkno = entry->nchunks;
     394                 :             : 
     395                 :             :         /*
     396                 :             :          * Loop over chunks.
     397                 :             :          */
     398         [ #  # ]:           0 :         for (chunkno = start_chunkno; chunkno < stop_chunkno; ++chunkno)
     399                 :             :         {
     400                 :           0 :                 uint16          chunk_usage = entry->chunk_usage[chunkno];
     401                 :           0 :                 BlockRefTableChunk chunk_data = entry->chunk_data[chunkno];
     402                 :           0 :                 unsigned        start_offset = 0;
     403                 :           0 :                 unsigned        stop_offset = BLOCKS_PER_CHUNK;
     404                 :             : 
     405                 :             :                 /*
     406                 :             :                  * If the start and/or stop block number falls within this chunk, the
     407                 :             :                  * whole chunk may not be of interest. Figure out which portion we
     408                 :             :                  * care about, if it's not the whole thing.
     409                 :             :                  */
     410         [ #  # ]:           0 :                 if (chunkno == start_chunkno)
     411                 :           0 :                         start_offset = start_blkno % BLOCKS_PER_CHUNK;
     412         [ #  # ]:           0 :                 if (chunkno == stop_chunkno - 1)
     413                 :             :                 {
     414         [ #  # ]:           0 :                         Assert(stop_blkno > chunkno * BLOCKS_PER_CHUNK);
     415                 :           0 :                         stop_offset = stop_blkno - (chunkno * BLOCKS_PER_CHUNK);
     416         [ #  # ]:           0 :                         Assert(stop_offset <= BLOCKS_PER_CHUNK);
     417                 :           0 :                 }
     418                 :             : 
     419                 :             :                 /*
     420                 :             :                  * Handling differs depending on whether this is an array of offsets
     421                 :             :                  * or a bitmap.
     422                 :             :                  */
     423         [ #  # ]:           0 :                 if (chunk_usage == MAX_ENTRIES_PER_CHUNK)
     424                 :             :                 {
     425                 :           0 :                         unsigned        i;
     426                 :             : 
     427                 :             :                         /* It's a bitmap, so test every relevant bit. */
     428         [ #  # ]:           0 :                         for (i = start_offset; i < stop_offset; ++i)
     429                 :             :                         {
     430                 :           0 :                                 uint16          w = chunk_data[i / BLOCKS_PER_ENTRY];
     431                 :             : 
     432         [ #  # ]:           0 :                                 if ((w & (1 << (i % BLOCKS_PER_ENTRY))) != 0)
     433                 :             :                                 {
     434                 :           0 :                                         BlockNumber blkno = chunkno * BLOCKS_PER_CHUNK + i;
     435                 :             : 
     436                 :           0 :                                         blocks[nresults++] = blkno;
     437                 :             : 
     438                 :             :                                         /* Early exit if we run out of output space. */
     439         [ #  # ]:           0 :                                         if (nresults == nblocks)
     440                 :           0 :                                                 return nresults;
     441         [ #  # ]:           0 :                                 }
     442         [ #  # ]:           0 :                         }
     443         [ #  # ]:           0 :                 }
     444                 :             :                 else
     445                 :             :                 {
     446                 :           0 :                         unsigned        i;
     447                 :             : 
     448                 :             :                         /* It's an array of offsets, so check each one. */
     449         [ #  # ]:           0 :                         for (i = 0; i < chunk_usage; ++i)
     450                 :             :                         {
     451                 :           0 :                                 uint16          offset = chunk_data[i];
     452                 :             : 
     453   [ #  #  #  # ]:           0 :                                 if (offset >= start_offset && offset < stop_offset)
     454                 :             :                                 {
     455                 :           0 :                                         BlockNumber blkno = chunkno * BLOCKS_PER_CHUNK + offset;
     456                 :             : 
     457                 :           0 :                                         blocks[nresults++] = blkno;
     458                 :             : 
     459                 :             :                                         /* Early exit if we run out of output space. */
     460         [ #  # ]:           0 :                                         if (nresults == nblocks)
     461                 :           0 :                                                 return nresults;
     462         [ #  # ]:           0 :                                 }
     463         [ #  # ]:           0 :                         }
     464         [ #  # ]:           0 :                 }
     465         [ #  # ]:           0 :         }
     466                 :             : 
     467                 :           0 :         return nresults;
     468                 :           0 : }
     469                 :             : 
     470                 :             : /*
     471                 :             :  * Serialize a block reference table to a file.
     472                 :             :  */
     473                 :             : void
     474                 :           0 : WriteBlockRefTable(BlockRefTable *brtab,
     475                 :             :                                    io_callback_fn write_callback,
     476                 :             :                                    void *write_callback_arg)
     477                 :             : {
     478                 :           0 :         BlockRefTableSerializedEntry *sdata = NULL;
     479                 :           0 :         BlockRefTableBuffer buffer;
     480                 :           0 :         uint32          magic = BLOCKREFTABLE_MAGIC;
     481                 :             : 
     482                 :             :         /* Prepare buffer. */
     483                 :           0 :         memset(&buffer, 0, sizeof(BlockRefTableBuffer));
     484                 :           0 :         buffer.io_callback = write_callback;
     485                 :           0 :         buffer.io_callback_arg = write_callback_arg;
     486                 :           0 :         INIT_CRC32C(buffer.crc);
     487                 :             : 
     488                 :             :         /* Write magic number. */
     489                 :           0 :         BlockRefTableWrite(&buffer, &magic, sizeof(uint32));
     490                 :             : 
     491                 :             :         /* Write the entries, assuming there are some. */
     492         [ #  # ]:           0 :         if (brtab->hash->members > 0)
     493                 :             :         {
     494                 :           0 :                 unsigned        i = 0;
     495                 :           0 :                 blockreftable_iterator it;
     496                 :           0 :                 BlockRefTableEntry *brtentry;
     497                 :             : 
     498                 :             :                 /* Extract entries into serializable format and sort them. */
     499                 :           0 :                 sdata =
     500                 :           0 :                         palloc_array(BlockRefTableSerializedEntry, brtab->hash->members);
     501                 :           0 :                 blockreftable_start_iterate(brtab->hash, &it);
     502         [ #  # ]:           0 :                 while ((brtentry = blockreftable_iterate(brtab->hash, &it)) != NULL)
     503                 :             :                 {
     504                 :           0 :                         BlockRefTableSerializedEntry *sentry = &sdata[i++];
     505                 :             : 
     506                 :           0 :                         sentry->rlocator = brtentry->key.rlocator;
     507                 :           0 :                         sentry->forknum = brtentry->key.forknum;
     508                 :           0 :                         sentry->limit_block = brtentry->limit_block;
     509                 :           0 :                         sentry->nchunks = brtentry->nchunks;
     510                 :             : 
     511                 :             :                         /* trim trailing zero entries */
     512   [ #  #  #  # ]:           0 :                         while (sentry->nchunks > 0 &&
     513                 :           0 :                                    brtentry->chunk_usage[sentry->nchunks - 1] == 0)
     514                 :           0 :                                 sentry->nchunks--;
     515                 :           0 :                 }
     516         [ #  # ]:           0 :                 Assert(i == brtab->hash->members);
     517                 :           0 :                 qsort(sdata, i, sizeof(BlockRefTableSerializedEntry),
     518                 :             :                           BlockRefTableComparator);
     519                 :             : 
     520                 :             :                 /* Loop over entries in sorted order and serialize each one. */
     521         [ #  # ]:           0 :                 for (i = 0; i < brtab->hash->members; ++i)
     522                 :             :                 {
     523                 :           0 :                         BlockRefTableSerializedEntry *sentry = &sdata[i];
     524                 :           0 :                         BlockRefTableKey key = {0}; /* make sure any padding is zero */
     525                 :           0 :                         unsigned        j;
     526                 :             : 
     527                 :             :                         /* Write the serialized entry itself. */
     528                 :           0 :                         BlockRefTableWrite(&buffer, sentry,
     529                 :             :                                                            sizeof(BlockRefTableSerializedEntry));
     530                 :             : 
     531                 :             :                         /* Look up the original entry so we can access the chunks. */
     532                 :           0 :                         memcpy(&key.rlocator, &sentry->rlocator, sizeof(RelFileLocator));
     533                 :           0 :                         key.forknum = sentry->forknum;
     534                 :           0 :                         brtentry = blockreftable_lookup(brtab->hash, key);
     535         [ #  # ]:           0 :                         Assert(brtentry != NULL);
     536                 :             : 
     537                 :             :                         /* Write the untruncated portion of the chunk length array. */
     538         [ #  # ]:           0 :                         if (sentry->nchunks != 0)
     539                 :           0 :                                 BlockRefTableWrite(&buffer, brtentry->chunk_usage,
     540                 :           0 :                                                                    sentry->nchunks * sizeof(uint16));
     541                 :             : 
     542                 :             :                         /* Write the contents of each chunk. */
     543         [ #  # ]:           0 :                         for (j = 0; j < brtentry->nchunks; ++j)
     544                 :             :                         {
     545         [ #  # ]:           0 :                                 if (brtentry->chunk_usage[j] == 0)
     546                 :           0 :                                         continue;
     547                 :           0 :                                 BlockRefTableWrite(&buffer, brtentry->chunk_data[j],
     548                 :           0 :                                                                    brtentry->chunk_usage[j] * sizeof(uint16));
     549                 :           0 :                         }
     550                 :           0 :                 }
     551                 :           0 :         }
     552                 :             : 
     553                 :             :         /* Write out appropriate terminator and CRC and flush buffer. */
     554                 :           0 :         BlockRefTableFileTerminate(&buffer);
     555                 :           0 : }
     556                 :             : 
     557                 :             : /*
     558                 :             :  * Prepare to incrementally read a block reference table file.
     559                 :             :  *
     560                 :             :  * 'read_callback' is a function that can be called to read data from the
     561                 :             :  * underlying file (or other data source) into our internal buffer.
     562                 :             :  *
     563                 :             :  * 'read_callback_arg' is an opaque argument to be passed to read_callback.
     564                 :             :  *
     565                 :             :  * 'error_filename' is the filename that should be included in error messages
     566                 :             :  * if the file is found to be malformed. The value is not copied, so the
     567                 :             :  * caller should ensure that it remains valid until done with this
     568                 :             :  * BlockRefTableReader.
     569                 :             :  *
     570                 :             :  * 'error_callback' is a function to be called if the file is found to be
     571                 :             :  * malformed. This is not used for I/O errors, which must be handled internally
     572                 :             :  * by read_callback.
     573                 :             :  *
     574                 :             :  * 'error_callback_arg' is an opaque argument to be passed to error_callback.
     575                 :             :  */
     576                 :             : BlockRefTableReader *
     577                 :           0 : CreateBlockRefTableReader(io_callback_fn read_callback,
     578                 :             :                                                   void *read_callback_arg,
     579                 :             :                                                   char *error_filename,
     580                 :             :                                                   report_error_fn error_callback,
     581                 :             :                                                   void *error_callback_arg)
     582                 :             : {
     583                 :           0 :         BlockRefTableReader *reader;
     584                 :           0 :         uint32          magic;
     585                 :             : 
     586                 :             :         /* Initialize data structure. */
     587                 :           0 :         reader = palloc0_object(BlockRefTableReader);
     588                 :           0 :         reader->buffer.io_callback = read_callback;
     589                 :           0 :         reader->buffer.io_callback_arg = read_callback_arg;
     590                 :           0 :         reader->error_filename = error_filename;
     591                 :           0 :         reader->error_callback = error_callback;
     592                 :           0 :         reader->error_callback_arg = error_callback_arg;
     593                 :           0 :         INIT_CRC32C(reader->buffer.crc);
     594                 :             : 
     595                 :             :         /* Verify magic number. */
     596                 :           0 :         BlockRefTableRead(reader, &magic, sizeof(uint32));
     597         [ #  # ]:           0 :         if (magic != BLOCKREFTABLE_MAGIC)
     598                 :           0 :                 error_callback(error_callback_arg,
     599                 :             :                                            "file \"%s\" has wrong magic number: expected %u, found %u",
     600                 :           0 :                                            error_filename,
     601                 :           0 :                                            BLOCKREFTABLE_MAGIC, magic);
     602                 :             : 
     603                 :           0 :         return reader;
     604                 :           0 : }
     605                 :             : 
     606                 :             : /*
     607                 :             :  * Read next relation fork covered by this block reference table file.
     608                 :             :  *
     609                 :             :  * After calling this function, you must call BlockRefTableReaderGetBlocks
     610                 :             :  * until it returns 0 before calling it again.
     611                 :             :  */
     612                 :             : bool
     613                 :           0 : BlockRefTableReaderNextRelation(BlockRefTableReader *reader,
     614                 :             :                                                                 RelFileLocator *rlocator,
     615                 :             :                                                                 ForkNumber *forknum,
     616                 :             :                                                                 BlockNumber *limit_block)
     617                 :             : {
     618                 :           0 :         BlockRefTableSerializedEntry sentry;
     619                 :           0 :         BlockRefTableSerializedEntry zentry = {0};
     620                 :             : 
     621                 :             :         /*
     622                 :             :          * Sanity check: caller must read all blocks from all chunks before moving
     623                 :             :          * on to the next relation.
     624                 :             :          */
     625         [ #  # ]:           0 :         Assert(reader->total_chunks == reader->consumed_chunks);
     626                 :             : 
     627                 :             :         /* Read serialized entry. */
     628                 :           0 :         BlockRefTableRead(reader, &sentry,
     629                 :             :                                           sizeof(BlockRefTableSerializedEntry));
     630                 :             : 
     631                 :             :         /*
     632                 :             :          * If we just read the sentinel entry indicating that we've reached the
     633                 :             :          * end, read and check the CRC.
     634                 :             :          */
     635         [ #  # ]:           0 :         if (memcmp(&sentry, &zentry, sizeof(BlockRefTableSerializedEntry)) == 0)
     636                 :             :         {
     637                 :           0 :                 pg_crc32c       expected_crc;
     638                 :           0 :                 pg_crc32c       actual_crc;
     639                 :             : 
     640                 :             :                 /*
     641                 :             :                  * We want to know the CRC of the file excluding the 4-byte CRC
     642                 :             :                  * itself, so copy the current value of the CRC accumulator before
     643                 :             :                  * reading those bytes, and use the copy to finalize the calculation.
     644                 :             :                  */
     645                 :           0 :                 expected_crc = reader->buffer.crc;
     646                 :           0 :                 FIN_CRC32C(expected_crc);
     647                 :             : 
     648                 :             :                 /* Now we can read the actual value. */
     649                 :           0 :                 BlockRefTableRead(reader, &actual_crc, sizeof(pg_crc32c));
     650                 :             : 
     651                 :             :                 /* Throw an error if there is a mismatch. */
     652         [ #  # ]:           0 :                 if (!EQ_CRC32C(expected_crc, actual_crc))
     653                 :           0 :                         reader->error_callback(reader->error_callback_arg,
     654                 :             :                                                                    "file \"%s\" has wrong checksum: expected %08X, found %08X",
     655                 :           0 :                                                                    reader->error_filename, expected_crc, actual_crc);
     656                 :             : 
     657                 :           0 :                 return false;
     658                 :           0 :         }
     659                 :             : 
     660                 :             :         /* Read chunk size array. */
     661         [ #  # ]:           0 :         if (reader->chunk_size != NULL)
     662                 :           0 :                 pfree(reader->chunk_size);
     663                 :           0 :         reader->chunk_size = palloc_array(uint16, sentry.nchunks);
     664                 :           0 :         BlockRefTableRead(reader, reader->chunk_size,
     665                 :           0 :                                           sentry.nchunks * sizeof(uint16));
     666                 :             : 
     667                 :             :         /* Set up for chunk scan. */
     668                 :           0 :         reader->total_chunks = sentry.nchunks;
     669                 :           0 :         reader->consumed_chunks = 0;
     670                 :             : 
     671                 :             :         /* Return data to caller. */
     672                 :           0 :         memcpy(rlocator, &sentry.rlocator, sizeof(RelFileLocator));
     673                 :           0 :         *forknum = sentry.forknum;
     674                 :           0 :         *limit_block = sentry.limit_block;
     675                 :           0 :         return true;
     676                 :           0 : }
     677                 :             : 
     678                 :             : /*
     679                 :             :  * Get modified blocks associated with the relation fork returned by
     680                 :             :  * the most recent call to BlockRefTableReaderNextRelation.
     681                 :             :  *
     682                 :             :  * On return, block numbers will be written into the 'blocks' array, whose
     683                 :             :  * length should be passed via 'nblocks'. The return value is the number of
     684                 :             :  * entries actually written into the 'blocks' array, which may be less than
     685                 :             :  * 'nblocks' if we run out of modified blocks in the relation fork before
     686                 :             :  * we run out of room in the array.
     687                 :             :  */
     688                 :             : unsigned
     689                 :           0 : BlockRefTableReaderGetBlocks(BlockRefTableReader *reader,
     690                 :             :                                                          BlockNumber *blocks,
     691                 :             :                                                          int nblocks)
     692                 :             : {
     693                 :           0 :         unsigned        blocks_found = 0;
     694                 :             : 
     695                 :             :         /* Must provide space for at least one block number to be returned. */
     696         [ #  # ]:           0 :         Assert(nblocks > 0);
     697                 :             : 
     698                 :             :         /* Loop collecting blocks to return to caller. */
     699                 :           0 :         for (;;)
     700                 :             :         {
     701                 :           0 :                 uint16          next_chunk_size;
     702                 :             : 
     703                 :             :                 /*
     704                 :             :                  * If we've read at least one chunk, maybe it contains some block
     705                 :             :                  * numbers that could satisfy caller's request.
     706                 :             :                  */
     707         [ #  # ]:           0 :                 if (reader->consumed_chunks > 0)
     708                 :             :                 {
     709                 :           0 :                         uint32          chunkno = reader->consumed_chunks - 1;
     710                 :           0 :                         uint16          chunk_size = reader->chunk_size[chunkno];
     711                 :             : 
     712         [ #  # ]:           0 :                         if (chunk_size == MAX_ENTRIES_PER_CHUNK)
     713                 :             :                         {
     714                 :             :                                 /* Bitmap format, so search for bits that are set. */
     715   [ #  #  #  # ]:           0 :                                 while (reader->chunk_position < BLOCKS_PER_CHUNK &&
     716                 :           0 :                                            blocks_found < nblocks)
     717                 :             :                                 {
     718                 :           0 :                                         uint16          chunkoffset = reader->chunk_position;
     719                 :           0 :                                         uint16          w;
     720                 :             : 
     721                 :           0 :                                         w = reader->chunk_data[chunkoffset / BLOCKS_PER_ENTRY];
     722         [ #  # ]:           0 :                                         if ((w & (1u << (chunkoffset % BLOCKS_PER_ENTRY))) != 0)
     723                 :           0 :                                                 blocks[blocks_found++] =
     724                 :           0 :                                                         chunkno * BLOCKS_PER_CHUNK + chunkoffset;
     725                 :           0 :                                         ++reader->chunk_position;
     726                 :           0 :                                 }
     727                 :           0 :                         }
     728                 :             :                         else
     729                 :             :                         {
     730                 :             :                                 /* Not in bitmap format, so each entry is a 2-byte offset. */
     731   [ #  #  #  # ]:           0 :                                 while (reader->chunk_position < chunk_size &&
     732                 :           0 :                                            blocks_found < nblocks)
     733                 :             :                                 {
     734                 :           0 :                                         blocks[blocks_found++] = chunkno * BLOCKS_PER_CHUNK
     735                 :           0 :                                                 + reader->chunk_data[reader->chunk_position];
     736                 :           0 :                                         ++reader->chunk_position;
     737                 :             :                                 }
     738                 :             :                         }
     739                 :           0 :                 }
     740                 :             : 
     741                 :             :                 /* We found enough blocks, so we're done. */
     742         [ #  # ]:           0 :                 if (blocks_found >= nblocks)
     743                 :           0 :                         break;
     744                 :             : 
     745                 :             :                 /*
     746                 :             :                  * We didn't find enough blocks, so we must need the next chunk. If
     747                 :             :                  * there are none left, though, then we're done anyway.
     748                 :             :                  */
     749         [ #  # ]:           0 :                 if (reader->consumed_chunks == reader->total_chunks)
     750                 :           0 :                         break;
     751                 :             : 
     752                 :             :                 /*
     753                 :             :                  * Read data for next chunk and reset scan position to beginning of
     754                 :             :                  * chunk. Note that the next chunk might be empty, in which case we
     755                 :             :                  * consume the chunk without actually consuming any bytes from the
     756                 :             :                  * underlying file.
     757                 :             :                  */
     758                 :           0 :                 next_chunk_size = reader->chunk_size[reader->consumed_chunks];
     759         [ #  # ]:           0 :                 if (next_chunk_size > 0)
     760                 :           0 :                         BlockRefTableRead(reader, reader->chunk_data,
     761                 :           0 :                                                           next_chunk_size * sizeof(uint16));
     762                 :           0 :                 ++reader->consumed_chunks;
     763                 :           0 :                 reader->chunk_position = 0;
     764      [ #  #  # ]:           0 :         }
     765                 :             : 
     766                 :           0 :         return blocks_found;
     767                 :           0 : }
     768                 :             : 
     769                 :             : /*
     770                 :             :  * Release memory used while reading a block reference table from a file.
     771                 :             :  */
     772                 :             : void
     773                 :           0 : DestroyBlockRefTableReader(BlockRefTableReader *reader)
     774                 :             : {
     775         [ #  # ]:           0 :         if (reader->chunk_size != NULL)
     776                 :             :         {
     777                 :           0 :                 pfree(reader->chunk_size);
     778                 :           0 :                 reader->chunk_size = NULL;
     779                 :           0 :         }
     780                 :           0 :         pfree(reader);
     781                 :           0 : }
     782                 :             : 
     783                 :             : /*
     784                 :             :  * Prepare to write a block reference table file incrementally.
     785                 :             :  *
     786                 :             :  * Caller must be able to supply BlockRefTableEntry objects sorted in the
     787                 :             :  * appropriate order.
     788                 :             :  */
     789                 :             : BlockRefTableWriter *
     790                 :           0 : CreateBlockRefTableWriter(io_callback_fn write_callback,
     791                 :             :                                                   void *write_callback_arg)
     792                 :             : {
     793                 :           0 :         BlockRefTableWriter *writer;
     794                 :           0 :         uint32          magic = BLOCKREFTABLE_MAGIC;
     795                 :             : 
     796                 :             :         /* Prepare buffer and CRC check and save callbacks. */
     797                 :           0 :         writer = palloc0_object(BlockRefTableWriter);
     798                 :           0 :         writer->buffer.io_callback = write_callback;
     799                 :           0 :         writer->buffer.io_callback_arg = write_callback_arg;
     800                 :           0 :         INIT_CRC32C(writer->buffer.crc);
     801                 :             : 
     802                 :             :         /* Write magic number. */
     803                 :           0 :         BlockRefTableWrite(&writer->buffer, &magic, sizeof(uint32));
     804                 :             : 
     805                 :           0 :         return writer;
     806                 :           0 : }
     807                 :             : 
     808                 :             : /*
     809                 :             :  * Append one entry to a block reference table file.
     810                 :             :  *
     811                 :             :  * Note that entries must be written in the proper order, that is, sorted by
     812                 :             :  * tablespace, then database, then relfilenumber, then fork number. Caller
     813                 :             :  * is responsible for supplying data in the correct order. If that seems hard,
     814                 :             :  * use an in-memory BlockRefTable instead.
     815                 :             :  */
     816                 :             : void
     817                 :           0 : BlockRefTableWriteEntry(BlockRefTableWriter *writer, BlockRefTableEntry *entry)
     818                 :             : {
     819                 :           0 :         BlockRefTableSerializedEntry sentry;
     820                 :           0 :         unsigned        j;
     821                 :             : 
     822                 :             :         /* Convert to serialized entry format. */
     823                 :           0 :         sentry.rlocator = entry->key.rlocator;
     824                 :           0 :         sentry.forknum = entry->key.forknum;
     825                 :           0 :         sentry.limit_block = entry->limit_block;
     826                 :           0 :         sentry.nchunks = entry->nchunks;
     827                 :             : 
     828                 :             :         /* Trim trailing zero entries. */
     829   [ #  #  #  # ]:           0 :         while (sentry.nchunks > 0 && entry->chunk_usage[sentry.nchunks - 1] == 0)
     830                 :           0 :                 sentry.nchunks--;
     831                 :             : 
     832                 :             :         /* Write the serialized entry itself. */
     833                 :           0 :         BlockRefTableWrite(&writer->buffer, &sentry,
     834                 :             :                                            sizeof(BlockRefTableSerializedEntry));
     835                 :             : 
     836                 :             :         /* Write the untruncated portion of the chunk length array. */
     837         [ #  # ]:           0 :         if (sentry.nchunks != 0)
     838                 :           0 :                 BlockRefTableWrite(&writer->buffer, entry->chunk_usage,
     839                 :           0 :                                                    sentry.nchunks * sizeof(uint16));
     840                 :             : 
     841                 :             :         /* Write the contents of each chunk. */
     842         [ #  # ]:           0 :         for (j = 0; j < entry->nchunks; ++j)
     843                 :             :         {
     844         [ #  # ]:           0 :                 if (entry->chunk_usage[j] == 0)
     845                 :           0 :                         continue;
     846                 :           0 :                 BlockRefTableWrite(&writer->buffer, entry->chunk_data[j],
     847                 :           0 :                                                    entry->chunk_usage[j] * sizeof(uint16));
     848                 :           0 :         }
     849                 :           0 : }
     850                 :             : 
     851                 :             : /*
     852                 :             :  * Finalize an incremental write of a block reference table file.
     853                 :             :  */
     854                 :             : void
     855                 :           0 : DestroyBlockRefTableWriter(BlockRefTableWriter *writer)
     856                 :             : {
     857                 :           0 :         BlockRefTableFileTerminate(&writer->buffer);
     858                 :           0 :         pfree(writer);
     859                 :           0 : }
     860                 :             : 
     861                 :             : /*
     862                 :             :  * Allocate a standalone BlockRefTableEntry.
     863                 :             :  *
     864                 :             :  * When we're manipulating a full in-memory BlockRefTable, the entries are
     865                 :             :  * part of the hash table and are allocated by simplehash. This routine is
     866                 :             :  * used by callers that want to write out a BlockRefTable to a file without
     867                 :             :  * needing to store the whole thing in memory at once.
     868                 :             :  *
     869                 :             :  * Entries allocated by this function can be manipulated using the functions
     870                 :             :  * BlockRefTableEntrySetLimitBlock and BlockRefTableEntryMarkBlockModified
     871                 :             :  * and then written using BlockRefTableWriteEntry and freed using
     872                 :             :  * BlockRefTableFreeEntry.
     873                 :             :  */
     874                 :             : BlockRefTableEntry *
     875                 :           0 : CreateBlockRefTableEntry(RelFileLocator rlocator, ForkNumber forknum)
     876                 :             : {
     877                 :           0 :         BlockRefTableEntry *entry = palloc0_object(BlockRefTableEntry);
     878                 :             : 
     879                 :           0 :         memcpy(&entry->key.rlocator, &rlocator, sizeof(RelFileLocator));
     880                 :           0 :         entry->key.forknum = forknum;
     881                 :           0 :         entry->limit_block = InvalidBlockNumber;
     882                 :             : 
     883                 :           0 :         return entry;
     884                 :           0 : }
     885                 :             : 
     886                 :             : /*
     887                 :             :  * Update a BlockRefTableEntry with a new value for the "limit block" and
     888                 :             :  * forget any equal-or-higher-numbered modified blocks.
     889                 :             :  *
     890                 :             :  * The "limit block" is the shortest known length of the relation within the
     891                 :             :  * range of WAL records covered by this block reference table.
     892                 :             :  */
     893                 :             : void
     894                 :           0 : BlockRefTableEntrySetLimitBlock(BlockRefTableEntry *entry,
     895                 :             :                                                                 BlockNumber limit_block)
     896                 :             : {
     897                 :           0 :         unsigned        chunkno;
     898                 :           0 :         unsigned        limit_chunkno;
     899                 :           0 :         unsigned        limit_chunkoffset;
     900                 :           0 :         BlockRefTableChunk limit_chunk;
     901                 :             : 
     902                 :             :         /* If we already have an equal or lower limit block, do nothing. */
     903         [ #  # ]:           0 :         if (limit_block >= entry->limit_block)
     904                 :           0 :                 return;
     905                 :             : 
     906                 :             :         /* Record the new limit block value. */
     907                 :           0 :         entry->limit_block = limit_block;
     908                 :             : 
     909                 :             :         /*
     910                 :             :          * Figure out which chunk would store the state of the new limit block,
     911                 :             :          * and which offset within that chunk.
     912                 :             :          */
     913                 :           0 :         limit_chunkno = limit_block / BLOCKS_PER_CHUNK;
     914                 :           0 :         limit_chunkoffset = limit_block % BLOCKS_PER_CHUNK;
     915                 :             : 
     916                 :             :         /*
     917                 :             :          * If the number of chunks is not large enough for any blocks with equal
     918                 :             :          * or higher block numbers to exist, then there is nothing further to do.
     919                 :             :          */
     920         [ #  # ]:           0 :         if (limit_chunkno >= entry->nchunks)
     921                 :           0 :                 return;
     922                 :             : 
     923                 :             :         /* Discard entire contents of any higher-numbered chunks. */
     924         [ #  # ]:           0 :         for (chunkno = limit_chunkno + 1; chunkno < entry->nchunks; ++chunkno)
     925                 :           0 :                 entry->chunk_usage[chunkno] = 0;
     926                 :             : 
     927                 :             :         /*
     928                 :             :          * Next, we need to discard any offsets within the chunk that would
     929                 :             :          * contain the limit_block. We must handle this differently depending on
     930                 :             :          * whether the chunk that would contain limit_block is a bitmap or an
     931                 :             :          * array of offsets.
     932                 :             :          */
     933                 :           0 :         limit_chunk = entry->chunk_data[limit_chunkno];
     934         [ #  # ]:           0 :         if (entry->chunk_usage[limit_chunkno] == MAX_ENTRIES_PER_CHUNK)
     935                 :             :         {
     936                 :           0 :                 unsigned        chunkoffset;
     937                 :             : 
     938                 :             :                 /* It's a bitmap. Unset bits. */
     939         [ #  # ]:           0 :                 for (chunkoffset = limit_chunkoffset; chunkoffset < BLOCKS_PER_CHUNK;
     940                 :           0 :                          ++chunkoffset)
     941                 :           0 :                         limit_chunk[chunkoffset / BLOCKS_PER_ENTRY] &=
     942                 :           0 :                                 ~(1 << (chunkoffset % BLOCKS_PER_ENTRY));
     943                 :           0 :         }
     944                 :             :         else
     945                 :             :         {
     946                 :           0 :                 unsigned        i,
     947                 :           0 :                                         j = 0;
     948                 :             : 
     949                 :             :                 /* It's an offset array. Filter out large offsets. */
     950         [ #  # ]:           0 :                 for (i = 0; i < entry->chunk_usage[limit_chunkno]; ++i)
     951                 :             :                 {
     952         [ #  # ]:           0 :                         Assert(j <= i);
     953         [ #  # ]:           0 :                         if (limit_chunk[i] < limit_chunkoffset)
     954                 :           0 :                                 limit_chunk[j++] = limit_chunk[i];
     955                 :           0 :                 }
     956         [ #  # ]:           0 :                 Assert(j <= entry->chunk_usage[limit_chunkno]);
     957                 :           0 :                 entry->chunk_usage[limit_chunkno] = j;
     958                 :           0 :         }
     959         [ #  # ]:           0 : }
     960                 :             : 
     961                 :             : /*
     962                 :             :  * Mark a block in a given BlockRefTableEntry as known to have been modified.
     963                 :             :  */
     964                 :             : void
     965                 :           0 : BlockRefTableEntryMarkBlockModified(BlockRefTableEntry *entry,
     966                 :             :                                                                         ForkNumber forknum,
     967                 :             :                                                                         BlockNumber blknum)
     968                 :             : {
     969                 :           0 :         unsigned        chunkno;
     970                 :           0 :         unsigned        chunkoffset;
     971                 :           0 :         unsigned        i;
     972                 :             : 
     973                 :             :         /*
     974                 :             :          * Which chunk should store the state of this block? And what is the
     975                 :             :          * offset of this block relative to the start of that chunk?
     976                 :             :          */
     977                 :           0 :         chunkno = blknum / BLOCKS_PER_CHUNK;
     978                 :           0 :         chunkoffset = blknum % BLOCKS_PER_CHUNK;
     979                 :             : 
     980                 :             :         /*
     981                 :             :          * If 'nchunks' isn't big enough for us to be able to represent the state
     982                 :             :          * of this block, we need to enlarge our arrays.
     983                 :             :          */
     984         [ #  # ]:           0 :         if (chunkno >= entry->nchunks)
     985                 :             :         {
     986                 :           0 :                 unsigned        max_chunks;
     987                 :           0 :                 unsigned        extra_chunks;
     988                 :             : 
     989                 :             :                 /*
     990                 :             :                  * New array size is a power of 2, at least 16, big enough so that
     991                 :             :                  * chunkno will be a valid array index.
     992                 :             :                  */
     993         [ #  # ]:           0 :                 max_chunks = Max(16, entry->nchunks);
     994         [ #  # ]:           0 :                 while (max_chunks < chunkno + 1)
     995                 :           0 :                         max_chunks *= 2;
     996                 :           0 :                 extra_chunks = max_chunks - entry->nchunks;
     997                 :             : 
     998         [ #  # ]:           0 :                 if (entry->nchunks == 0)
     999                 :             :                 {
    1000                 :           0 :                         entry->chunk_size = palloc0_array(uint16, max_chunks);
    1001                 :           0 :                         entry->chunk_usage = palloc0_array(uint16, max_chunks);
    1002                 :           0 :                         entry->chunk_data = palloc0_array(BlockRefTableChunk, max_chunks);
    1003                 :           0 :                 }
    1004                 :             :                 else
    1005                 :             :                 {
    1006                 :           0 :                         entry->chunk_size = repalloc(entry->chunk_size,
    1007                 :           0 :                                                                                  sizeof(uint16) * max_chunks);
    1008                 :           0 :                         memset(&entry->chunk_size[entry->nchunks], 0,
    1009                 :             :                                    extra_chunks * sizeof(uint16));
    1010                 :           0 :                         entry->chunk_usage = repalloc(entry->chunk_usage,
    1011                 :           0 :                                                                                   sizeof(uint16) * max_chunks);
    1012                 :           0 :                         memset(&entry->chunk_usage[entry->nchunks], 0,
    1013                 :             :                                    extra_chunks * sizeof(uint16));
    1014                 :           0 :                         entry->chunk_data = repalloc(entry->chunk_data,
    1015                 :           0 :                                                                                  sizeof(BlockRefTableChunk) * max_chunks);
    1016                 :           0 :                         memset(&entry->chunk_data[entry->nchunks], 0,
    1017                 :             :                                    extra_chunks * sizeof(BlockRefTableChunk));
    1018                 :             :                 }
    1019                 :           0 :                 entry->nchunks = max_chunks;
    1020                 :           0 :         }
    1021                 :             : 
    1022                 :             :         /*
    1023                 :             :          * If the chunk that covers this block number doesn't exist yet, create it
    1024                 :             :          * as an array and add the appropriate offset to it. We make it pretty
    1025                 :             :          * small initially, because there might only be 1 or a few block
    1026                 :             :          * references in this chunk and we don't want to use up too much memory.
    1027                 :             :          */
    1028         [ #  # ]:           0 :         if (entry->chunk_size[chunkno] == 0)
    1029                 :             :         {
    1030                 :           0 :                 entry->chunk_data[chunkno] =
    1031                 :           0 :                         palloc_array(uint16, INITIAL_ENTRIES_PER_CHUNK);
    1032                 :           0 :                 entry->chunk_size[chunkno] = INITIAL_ENTRIES_PER_CHUNK;
    1033                 :           0 :                 entry->chunk_data[chunkno][0] = chunkoffset;
    1034                 :           0 :                 entry->chunk_usage[chunkno] = 1;
    1035                 :           0 :                 return;
    1036                 :             :         }
    1037                 :             : 
    1038                 :             :         /*
    1039                 :             :          * If the number of entries in this chunk is already maximum, it must be a
    1040                 :             :          * bitmap. Just set the appropriate bit.
    1041                 :             :          */
    1042         [ #  # ]:           0 :         if (entry->chunk_usage[chunkno] == MAX_ENTRIES_PER_CHUNK)
    1043                 :             :         {
    1044                 :           0 :                 BlockRefTableChunk chunk = entry->chunk_data[chunkno];
    1045                 :             : 
    1046                 :           0 :                 chunk[chunkoffset / BLOCKS_PER_ENTRY] |=
    1047                 :           0 :                         1 << (chunkoffset % BLOCKS_PER_ENTRY);
    1048                 :             :                 return;
    1049                 :           0 :         }
    1050                 :             : 
    1051                 :             :         /*
    1052                 :             :          * There is an existing chunk and it's in array format. Let's find out
    1053                 :             :          * whether it already has an entry for this block. If so, we do not need
    1054                 :             :          * to do anything.
    1055                 :             :          */
    1056         [ #  # ]:           0 :         for (i = 0; i < entry->chunk_usage[chunkno]; ++i)
    1057                 :             :         {
    1058         [ #  # ]:           0 :                 if (entry->chunk_data[chunkno][i] == chunkoffset)
    1059                 :           0 :                         return;
    1060                 :           0 :         }
    1061                 :             : 
    1062                 :             :         /*
    1063                 :             :          * If the number of entries currently used is one less than the maximum,
    1064                 :             :          * it's time to convert to bitmap format.
    1065                 :             :          */
    1066         [ #  # ]:           0 :         if (entry->chunk_usage[chunkno] == MAX_ENTRIES_PER_CHUNK - 1)
    1067                 :             :         {
    1068                 :           0 :                 BlockRefTableChunk newchunk;
    1069                 :           0 :                 unsigned        j;
    1070                 :             : 
    1071                 :             :                 /* Allocate a new chunk. */
    1072                 :           0 :                 newchunk = palloc0(MAX_ENTRIES_PER_CHUNK * sizeof(uint16));
    1073                 :             : 
    1074                 :             :                 /* Set the bit for each existing entry. */
    1075         [ #  # ]:           0 :                 for (j = 0; j < entry->chunk_usage[chunkno]; ++j)
    1076                 :             :                 {
    1077                 :           0 :                         unsigned        coff = entry->chunk_data[chunkno][j];
    1078                 :             : 
    1079                 :           0 :                         newchunk[coff / BLOCKS_PER_ENTRY] |=
    1080                 :           0 :                                 1 << (coff % BLOCKS_PER_ENTRY);
    1081                 :           0 :                 }
    1082                 :             : 
    1083                 :             :                 /* Set the bit for the new entry. */
    1084                 :           0 :                 newchunk[chunkoffset / BLOCKS_PER_ENTRY] |=
    1085                 :           0 :                         1 << (chunkoffset % BLOCKS_PER_ENTRY);
    1086                 :             : 
    1087                 :             :                 /* Swap the new chunk into place and update metadata. */
    1088                 :           0 :                 pfree(entry->chunk_data[chunkno]);
    1089                 :           0 :                 entry->chunk_data[chunkno] = newchunk;
    1090                 :           0 :                 entry->chunk_size[chunkno] = MAX_ENTRIES_PER_CHUNK;
    1091                 :           0 :                 entry->chunk_usage[chunkno] = MAX_ENTRIES_PER_CHUNK;
    1092                 :             :                 return;
    1093                 :           0 :         }
    1094                 :             : 
    1095                 :             :         /*
    1096                 :             :          * OK, we currently have an array, and we don't need to convert to a
    1097                 :             :          * bitmap, but we do need to add a new element. If there's not enough
    1098                 :             :          * room, we'll have to expand the array.
    1099                 :             :          */
    1100         [ #  # ]:           0 :         if (entry->chunk_usage[chunkno] == entry->chunk_size[chunkno])
    1101                 :             :         {
    1102                 :           0 :                 unsigned        newsize = entry->chunk_size[chunkno] * 2;
    1103                 :             : 
    1104         [ #  # ]:           0 :                 Assert(newsize <= MAX_ENTRIES_PER_CHUNK);
    1105                 :           0 :                 entry->chunk_data[chunkno] = repalloc(entry->chunk_data[chunkno],
    1106                 :           0 :                                                                                           newsize * sizeof(uint16));
    1107                 :           0 :                 entry->chunk_size[chunkno] = newsize;
    1108                 :           0 :         }
    1109                 :             : 
    1110                 :             :         /* Now we can add the new entry. */
    1111                 :           0 :         entry->chunk_data[chunkno][entry->chunk_usage[chunkno]] =
    1112                 :           0 :                 chunkoffset;
    1113                 :           0 :         entry->chunk_usage[chunkno]++;
    1114         [ #  # ]:           0 : }
    1115                 :             : 
    1116                 :             : /*
    1117                 :             :  * Release memory for a BlockRefTableEntry that was created by
    1118                 :             :  * CreateBlockRefTableEntry.
    1119                 :             :  */
    1120                 :             : void
    1121                 :           0 : BlockRefTableFreeEntry(BlockRefTableEntry *entry)
    1122                 :             : {
    1123         [ #  # ]:           0 :         if (entry->chunk_size != NULL)
    1124                 :             :         {
    1125                 :           0 :                 pfree(entry->chunk_size);
    1126                 :           0 :                 entry->chunk_size = NULL;
    1127                 :           0 :         }
    1128                 :             : 
    1129         [ #  # ]:           0 :         if (entry->chunk_usage != NULL)
    1130                 :             :         {
    1131                 :           0 :                 pfree(entry->chunk_usage);
    1132                 :           0 :                 entry->chunk_usage = NULL;
    1133                 :           0 :         }
    1134                 :             : 
    1135         [ #  # ]:           0 :         if (entry->chunk_data != NULL)
    1136                 :             :         {
    1137                 :           0 :                 pfree(entry->chunk_data);
    1138                 :           0 :                 entry->chunk_data = NULL;
    1139                 :           0 :         }
    1140                 :             : 
    1141                 :           0 :         pfree(entry);
    1142                 :           0 : }
    1143                 :             : 
    1144                 :             : /*
    1145                 :             :  * Comparator for BlockRefTableSerializedEntry objects.
    1146                 :             :  *
    1147                 :             :  * We make the tablespace OID the first column of the sort key to match
    1148                 :             :  * the on-disk tree structure.
    1149                 :             :  */
    1150                 :             : static int
    1151                 :           0 : BlockRefTableComparator(const void *a, const void *b)
    1152                 :             : {
    1153                 :           0 :         const BlockRefTableSerializedEntry *sa = a;
    1154                 :           0 :         const BlockRefTableSerializedEntry *sb = b;
    1155                 :             : 
    1156         [ #  # ]:           0 :         if (sa->rlocator.spcOid > sb->rlocator.spcOid)
    1157                 :           0 :                 return 1;
    1158         [ #  # ]:           0 :         if (sa->rlocator.spcOid < sb->rlocator.spcOid)
    1159                 :           0 :                 return -1;
    1160                 :             : 
    1161         [ #  # ]:           0 :         if (sa->rlocator.dbOid > sb->rlocator.dbOid)
    1162                 :           0 :                 return 1;
    1163         [ #  # ]:           0 :         if (sa->rlocator.dbOid < sb->rlocator.dbOid)
    1164                 :           0 :                 return -1;
    1165                 :             : 
    1166         [ #  # ]:           0 :         if (sa->rlocator.relNumber > sb->rlocator.relNumber)
    1167                 :           0 :                 return 1;
    1168         [ #  # ]:           0 :         if (sa->rlocator.relNumber < sb->rlocator.relNumber)
    1169                 :           0 :                 return -1;
    1170                 :             : 
    1171         [ #  # ]:           0 :         if (sa->forknum > sb->forknum)
    1172                 :           0 :                 return 1;
    1173         [ #  # ]:           0 :         if (sa->forknum < sb->forknum)
    1174                 :           0 :                 return -1;
    1175                 :             : 
    1176                 :           0 :         return 0;
    1177                 :           0 : }
    1178                 :             : 
    1179                 :             : /*
    1180                 :             :  * Flush any buffered data out of a BlockRefTableBuffer.
    1181                 :             :  */
    1182                 :             : static void
    1183                 :           0 : BlockRefTableFlush(BlockRefTableBuffer *buffer)
    1184                 :             : {
    1185                 :           0 :         buffer->io_callback(buffer->io_callback_arg, buffer->data, buffer->used);
    1186                 :           0 :         buffer->used = 0;
    1187                 :           0 : }
    1188                 :             : 
    1189                 :             : /*
    1190                 :             :  * Read data from a BlockRefTableBuffer, and update the running CRC
    1191                 :             :  * calculation for the returned data (but not any data that we may have
    1192                 :             :  * buffered but not yet actually returned).
    1193                 :             :  */
    1194                 :             : static void
    1195                 :           0 : BlockRefTableRead(BlockRefTableReader *reader, void *data, int length)
    1196                 :             : {
    1197                 :           0 :         BlockRefTableBuffer *buffer = &reader->buffer;
    1198                 :             : 
    1199                 :             :         /* Loop until read is fully satisfied. */
    1200         [ #  # ]:           0 :         while (length > 0)
    1201                 :             :         {
    1202         [ #  # ]:           0 :                 if (buffer->cursor < buffer->used)
    1203                 :             :                 {
    1204                 :             :                         /*
    1205                 :             :                          * If any buffered data is available, use that to satisfy as much
    1206                 :             :                          * of the request as possible.
    1207                 :             :                          */
    1208         [ #  # ]:           0 :                         int                     bytes_to_copy = Min(length, buffer->used - buffer->cursor);
    1209                 :             : 
    1210                 :           0 :                         memcpy(data, &buffer->data[buffer->cursor], bytes_to_copy);
    1211                 :           0 :                         COMP_CRC32C(buffer->crc, &buffer->data[buffer->cursor],
    1212                 :             :                                                 bytes_to_copy);
    1213                 :           0 :                         buffer->cursor += bytes_to_copy;
    1214                 :           0 :                         data = ((char *) data) + bytes_to_copy;
    1215                 :           0 :                         length -= bytes_to_copy;
    1216                 :           0 :                 }
    1217         [ #  # ]:           0 :                 else if (length >= BUFSIZE)
    1218                 :             :                 {
    1219                 :             :                         /*
    1220                 :             :                          * If the request length is long, read directly into caller's
    1221                 :             :                          * buffer.
    1222                 :             :                          */
    1223                 :           0 :                         int                     bytes_read;
    1224                 :             : 
    1225                 :           0 :                         bytes_read = buffer->io_callback(buffer->io_callback_arg,
    1226                 :           0 :                                                                                          data, length);
    1227                 :           0 :                         COMP_CRC32C(buffer->crc, data, bytes_read);
    1228                 :           0 :                         data = ((char *) data) + bytes_read;
    1229                 :           0 :                         length -= bytes_read;
    1230                 :             : 
    1231                 :             :                         /* If we didn't get anything, that's bad. */
    1232         [ #  # ]:           0 :                         if (bytes_read == 0)
    1233                 :           0 :                                 reader->error_callback(reader->error_callback_arg,
    1234                 :             :                                                                            "file \"%s\" ends unexpectedly",
    1235                 :           0 :                                                                            reader->error_filename);
    1236                 :           0 :                 }
    1237                 :             :                 else
    1238                 :             :                 {
    1239                 :             :                         /*
    1240                 :             :                          * Refill our buffer.
    1241                 :             :                          */
    1242                 :           0 :                         buffer->used = buffer->io_callback(buffer->io_callback_arg,
    1243                 :           0 :                                                                                            buffer->data, BUFSIZE);
    1244                 :           0 :                         buffer->cursor = 0;
    1245                 :             : 
    1246                 :             :                         /* If we didn't get anything, that's bad. */
    1247         [ #  # ]:           0 :                         if (buffer->used == 0)
    1248                 :           0 :                                 reader->error_callback(reader->error_callback_arg,
    1249                 :             :                                                                            "file \"%s\" ends unexpectedly",
    1250                 :           0 :                                                                            reader->error_filename);
    1251                 :             :                 }
    1252                 :             :         }
    1253                 :           0 : }
    1254                 :             : 
    1255                 :             : /*
    1256                 :             :  * Supply data to a BlockRefTableBuffer for write to the underlying File,
    1257                 :             :  * and update the running CRC calculation for that data.
    1258                 :             :  */
    1259                 :             : static void
    1260                 :           0 : BlockRefTableWrite(BlockRefTableBuffer *buffer, void *data, int length)
    1261                 :             : {
    1262                 :             :         /* Update running CRC calculation. */
    1263                 :           0 :         COMP_CRC32C(buffer->crc, data, length);
    1264                 :             : 
    1265                 :             :         /* If the new data can't fit into the buffer, flush the buffer. */
    1266         [ #  # ]:           0 :         if (buffer->used + length > BUFSIZE)
    1267                 :             :         {
    1268                 :           0 :                 buffer->io_callback(buffer->io_callback_arg, buffer->data,
    1269                 :           0 :                                                         buffer->used);
    1270                 :           0 :                 buffer->used = 0;
    1271                 :           0 :         }
    1272                 :             : 
    1273                 :             :         /* If the new data would fill the buffer, or more, write it directly. */
    1274         [ #  # ]:           0 :         if (length >= BUFSIZE)
    1275                 :             :         {
    1276                 :           0 :                 buffer->io_callback(buffer->io_callback_arg, data, length);
    1277                 :           0 :                 return;
    1278                 :             :         }
    1279                 :             : 
    1280                 :             :         /* Otherwise, copy the new data into the buffer. */
    1281                 :           0 :         memcpy(&buffer->data[buffer->used], data, length);
    1282                 :           0 :         buffer->used += length;
    1283         [ #  # ]:           0 :         Assert(buffer->used <= BUFSIZE);
    1284                 :           0 : }
    1285                 :             : 
    1286                 :             : /*
    1287                 :             :  * Generate the sentinel and CRC required at the end of a block reference
    1288                 :             :  * table file and flush them out of our internal buffer.
    1289                 :             :  */
    1290                 :             : static void
    1291                 :           0 : BlockRefTableFileTerminate(BlockRefTableBuffer *buffer)
    1292                 :             : {
    1293                 :           0 :         BlockRefTableSerializedEntry zentry = {0};
    1294                 :           0 :         pg_crc32c       crc;
    1295                 :             : 
    1296                 :             :         /* Write a sentinel indicating that there are no more entries. */
    1297                 :           0 :         BlockRefTableWrite(buffer, &zentry,
    1298                 :             :                                            sizeof(BlockRefTableSerializedEntry));
    1299                 :             : 
    1300                 :             :         /*
    1301                 :             :          * Writing the checksum will perturb the ongoing checksum calculation, so
    1302                 :             :          * copy the state first and finalize the computation using the copy.
    1303                 :             :          */
    1304                 :           0 :         crc = buffer->crc;
    1305                 :           0 :         FIN_CRC32C(crc);
    1306                 :           0 :         BlockRefTableWrite(buffer, &crc, sizeof(pg_crc32c));
    1307                 :             : 
    1308                 :             :         /* Flush any leftover data out of our buffer. */
    1309                 :           0 :         BlockRefTableFlush(buffer);
    1310                 :           0 : }
        

Generated by: LCOV version 2.3.2-1