LCOV - code coverage report
Current view: top level - src/bin/pg_rewind - parsexlog.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 205 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 5 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * parsexlog.c
       4              :  *        Functions for reading Write-Ahead-Log
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *-------------------------------------------------------------------------
      10              :  */
      11              : 
      12              : #include "postgres_fe.h"
      13              : 
      14              : #include <unistd.h>
      15              : 
      16              : #include "access/rmgr.h"
      17              : #include "access/xact.h"
      18              : #include "access/xlog_internal.h"
      19              : #include "access/xlogreader.h"
      20              : #include "catalog/pg_control.h"
      21              : #include "catalog/storage_xlog.h"
      22              : #include "commands/dbcommands_xlog.h"
      23              : #include "fe_utils/archive.h"
      24              : #include "filemap.h"
      25              : #include "pg_rewind.h"
      26              : 
      27              : /*
      28              :  * RmgrNames is an array of the built-in resource manager names, to make error
      29              :  * messages a bit nicer.
      30              :  */
      31              : #define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
      32              :   name,
      33              : 
      34              : static const char *const RmgrNames[RM_MAX_ID + 1] = {
      35              : #include "access/rmgrlist.h"
      36              : };
      37              : 
      38              : #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
      39              :                                                 RmgrNames[rmid] : "custom")
      40              : 
      41              : static void extractPageInfo(XLogReaderState *record);
      42              : 
      43              : static int      xlogreadfd = -1;
      44              : static XLogSegNo xlogreadsegno = 0;
      45              : static char xlogfpath[MAXPGPATH];
      46              : 
      47              : typedef struct XLogPageReadPrivate
      48              : {
      49              :         const char *restoreCommand;
      50              :         int                     tliIndex;
      51              : } XLogPageReadPrivate;
      52              : 
      53              : static int      SimpleXLogPageRead(XLogReaderState *xlogreader,
      54              :                                                            XLogRecPtr targetPagePtr,
      55              :                                                            int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
      56              : 
      57              : /*
      58              :  * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
      59              :  * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
      60              :  * the data blocks touched by the WAL records, and return them in a page map.
      61              :  *
      62              :  * 'endpoint' is the end of the last record to read. The record starting at
      63              :  * 'endpoint' is the first one that is not read.
      64              :  */
      65              : void
      66            0 : extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
      67              :                            XLogRecPtr endpoint, const char *restoreCommand)
      68              : {
      69            0 :         XLogRecord *record;
      70            0 :         XLogReaderState *xlogreader;
      71            0 :         char       *errormsg;
      72            0 :         XLogPageReadPrivate private;
      73              : 
      74            0 :         private.tliIndex = tliIndex;
      75            0 :         private.restoreCommand = restoreCommand;
      76            0 :         xlogreader = XLogReaderAllocate(WalSegSz, datadir,
      77            0 :                                                                         XL_ROUTINE(.page_read = &SimpleXLogPageRead),
      78              :                                                                         &private);
      79            0 :         if (xlogreader == NULL)
      80            0 :                 pg_fatal("out of memory while allocating a WAL reading processor");
      81              : 
      82            0 :         XLogBeginRead(xlogreader, startpoint);
      83            0 :         do
      84              :         {
      85            0 :                 record = XLogReadRecord(xlogreader, &errormsg);
      86              : 
      87            0 :                 if (record == NULL)
      88              :                 {
      89            0 :                         XLogRecPtr      errptr = xlogreader->EndRecPtr;
      90              : 
      91            0 :                         if (errormsg)
      92            0 :                                 pg_fatal("could not read WAL record at %X/%08X: %s",
      93              :                                                  LSN_FORMAT_ARGS(errptr),
      94              :                                                  errormsg);
      95              :                         else
      96            0 :                                 pg_fatal("could not read WAL record at %X/%08X",
      97              :                                                  LSN_FORMAT_ARGS(errptr));
      98            0 :                 }
      99              : 
     100            0 :                 extractPageInfo(xlogreader);
     101            0 :         } while (xlogreader->EndRecPtr < endpoint);
     102              : 
     103              :         /*
     104              :          * If 'endpoint' didn't point exactly at a record boundary, the caller
     105              :          * messed up.
     106              :          */
     107            0 :         if (xlogreader->EndRecPtr != endpoint)
     108            0 :                 pg_fatal("end pointer %X/%08X is not a valid end point; expected %X/%08X",
     109              :                                  LSN_FORMAT_ARGS(endpoint), LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
     110              : 
     111            0 :         XLogReaderFree(xlogreader);
     112            0 :         if (xlogreadfd != -1)
     113              :         {
     114            0 :                 close(xlogreadfd);
     115            0 :                 xlogreadfd = -1;
     116            0 :         }
     117            0 : }
     118              : 
     119              : /*
     120              :  * Reads one WAL record. Returns the end position of the record, without
     121              :  * doing anything with the record itself.
     122              :  */
     123              : XLogRecPtr
     124            0 : readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
     125              :                           const char *restoreCommand)
     126              : {
     127            0 :         XLogRecord *record;
     128            0 :         XLogReaderState *xlogreader;
     129            0 :         char       *errormsg;
     130            0 :         XLogPageReadPrivate private;
     131            0 :         XLogRecPtr      endptr;
     132              : 
     133            0 :         private.tliIndex = tliIndex;
     134            0 :         private.restoreCommand = restoreCommand;
     135            0 :         xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     136            0 :                                                                         XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     137              :                                                                         &private);
     138            0 :         if (xlogreader == NULL)
     139            0 :                 pg_fatal("out of memory while allocating a WAL reading processor");
     140              : 
     141            0 :         XLogBeginRead(xlogreader, ptr);
     142            0 :         record = XLogReadRecord(xlogreader, &errormsg);
     143            0 :         if (record == NULL)
     144              :         {
     145            0 :                 if (errormsg)
     146            0 :                         pg_fatal("could not read WAL record at %X/%08X: %s",
     147              :                                          LSN_FORMAT_ARGS(ptr), errormsg);
     148              :                 else
     149            0 :                         pg_fatal("could not read WAL record at %X/%08X",
     150              :                                          LSN_FORMAT_ARGS(ptr));
     151            0 :         }
     152            0 :         endptr = xlogreader->EndRecPtr;
     153              : 
     154            0 :         XLogReaderFree(xlogreader);
     155            0 :         if (xlogreadfd != -1)
     156              :         {
     157            0 :                 close(xlogreadfd);
     158            0 :                 xlogreadfd = -1;
     159            0 :         }
     160              : 
     161            0 :         return endptr;
     162            0 : }
     163              : 
     164              : /*
     165              :  * Find the previous checkpoint preceding given WAL location.
     166              :  */
     167              : void
     168            0 : findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
     169              :                                    XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
     170              :                                    XLogRecPtr *lastchkptredo, const char *restoreCommand)
     171              : {
     172              :         /* Walk backwards, starting from the given record */
     173            0 :         XLogRecord *record;
     174            0 :         XLogRecPtr      searchptr;
     175            0 :         XLogReaderState *xlogreader;
     176            0 :         char       *errormsg;
     177            0 :         XLogPageReadPrivate private;
     178            0 :         XLogSegNo       current_segno = 0;
     179            0 :         TimeLineID      current_tli = 0;
     180              : 
     181              :         /*
     182              :          * The given fork pointer points to the end of the last common record,
     183              :          * which is not necessarily the beginning of the next record, if the
     184              :          * previous record happens to end at a page boundary. Skip over the page
     185              :          * header in that case to find the next record.
     186              :          */
     187            0 :         if (forkptr % XLOG_BLCKSZ == 0)
     188              :         {
     189            0 :                 if (XLogSegmentOffset(forkptr, WalSegSz) == 0)
     190            0 :                         forkptr += SizeOfXLogLongPHD;
     191              :                 else
     192            0 :                         forkptr += SizeOfXLogShortPHD;
     193            0 :         }
     194              : 
     195            0 :         private.tliIndex = tliIndex;
     196            0 :         private.restoreCommand = restoreCommand;
     197            0 :         xlogreader = XLogReaderAllocate(WalSegSz, datadir,
     198            0 :                                                                         XL_ROUTINE(.page_read = &SimpleXLogPageRead),
     199              :                                                                         &private);
     200            0 :         if (xlogreader == NULL)
     201            0 :                 pg_fatal("out of memory while allocating a WAL reading processor");
     202              : 
     203            0 :         searchptr = forkptr;
     204            0 :         for (;;)
     205              :         {
     206            0 :                 uint8           info;
     207              : 
     208            0 :                 XLogBeginRead(xlogreader, searchptr);
     209            0 :                 record = XLogReadRecord(xlogreader, &errormsg);
     210              : 
     211            0 :                 if (record == NULL)
     212              :                 {
     213            0 :                         if (errormsg)
     214            0 :                                 pg_fatal("could not find previous WAL record at %X/%08X: %s",
     215              :                                                  LSN_FORMAT_ARGS(searchptr),
     216              :                                                  errormsg);
     217              :                         else
     218            0 :                                 pg_fatal("could not find previous WAL record at %X/%08X",
     219              :                                                  LSN_FORMAT_ARGS(searchptr));
     220            0 :                 }
     221              : 
     222              :                 /* Detect if a new WAL file has been opened */
     223            0 :                 if (xlogreader->seg.ws_tli != current_tli ||
     224            0 :                         xlogreader->seg.ws_segno != current_segno)
     225              :                 {
     226            0 :                         char            xlogfname[MAXFNAMELEN];
     227              : 
     228            0 :                         snprintf(xlogfname, MAXFNAMELEN, XLOGDIR "/");
     229              : 
     230              :                         /* update current values */
     231            0 :                         current_tli = xlogreader->seg.ws_tli;
     232            0 :                         current_segno = xlogreader->seg.ws_segno;
     233              : 
     234            0 :                         XLogFileName(xlogfname + sizeof(XLOGDIR),
     235            0 :                                                  current_tli, current_segno, WalSegSz);
     236              : 
     237              :                         /* Track this filename as one to not remove */
     238            0 :                         keepwal_add_entry(xlogfname);
     239            0 :                 }
     240              : 
     241              :                 /*
     242              :                  * Check if it is a checkpoint record. This checkpoint record needs to
     243              :                  * be the latest checkpoint before WAL forked and not the checkpoint
     244              :                  * where the primary has been stopped to be rewound.
     245              :                  */
     246            0 :                 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
     247            0 :                 if (searchptr < forkptr &&
     248            0 :                         XLogRecGetRmid(xlogreader) == RM_XLOG_ID &&
     249            0 :                         (info == XLOG_CHECKPOINT_SHUTDOWN ||
     250            0 :                          info == XLOG_CHECKPOINT_ONLINE))
     251              :                 {
     252            0 :                         CheckPoint      checkPoint;
     253              : 
     254            0 :                         memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
     255            0 :                         *lastchkptrec = searchptr;
     256            0 :                         *lastchkpttli = checkPoint.ThisTimeLineID;
     257            0 :                         *lastchkptredo = checkPoint.redo;
     258              :                         break;
     259            0 :                 }
     260              : 
     261              :                 /* Walk backwards to previous record. */
     262            0 :                 searchptr = record->xl_prev;
     263            0 :         }
     264              : 
     265            0 :         XLogReaderFree(xlogreader);
     266            0 :         if (xlogreadfd != -1)
     267              :         {
     268            0 :                 close(xlogreadfd);
     269            0 :                 xlogreadfd = -1;
     270            0 :         }
     271            0 : }
     272              : 
     273              : /* XLogReader callback function, to read a WAL page */
     274              : static int
     275            0 : SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
     276              :                                    int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
     277              : {
     278            0 :         XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
     279            0 :         uint32          targetPageOff;
     280            0 :         XLogRecPtr      targetSegEnd;
     281            0 :         XLogSegNo       targetSegNo;
     282            0 :         int                     r;
     283              : 
     284            0 :         XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
     285            0 :         XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
     286            0 :         targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz);
     287              : 
     288              :         /*
     289              :          * See if we need to switch to a new segment because the requested record
     290              :          * is not in the currently open one.
     291              :          */
     292            0 :         if (xlogreadfd >= 0 &&
     293            0 :                 !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz))
     294              :         {
     295            0 :                 close(xlogreadfd);
     296            0 :                 xlogreadfd = -1;
     297            0 :         }
     298              : 
     299            0 :         XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz);
     300              : 
     301            0 :         if (xlogreadfd < 0)
     302              :         {
     303            0 :                 char            xlogfname[MAXFNAMELEN];
     304              : 
     305              :                 /*
     306              :                  * Since incomplete segments are copied into next timelines, switch to
     307              :                  * the timeline holding the required segment. Assuming this scan can
     308              :                  * be done both forward and backward, consider also switching timeline
     309              :                  * accordingly.
     310              :                  */
     311            0 :                 while (private->tliIndex < targetNentries - 1 &&
     312            0 :                            targetHistory[private->tliIndex].end < targetSegEnd)
     313            0 :                         private->tliIndex++;
     314            0 :                 while (private->tliIndex > 0 &&
     315            0 :                            targetHistory[private->tliIndex].begin >= targetSegEnd)
     316            0 :                         private->tliIndex--;
     317              : 
     318            0 :                 XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
     319            0 :                                          xlogreadsegno, WalSegSz);
     320              : 
     321            0 :                 snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
     322            0 :                                  xlogreader->segcxt.ws_dir, xlogfname);
     323              : 
     324            0 :                 xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
     325              : 
     326            0 :                 if (xlogreadfd < 0)
     327              :                 {
     328              :                         /*
     329              :                          * If we have no restore_command to execute, then exit.
     330              :                          */
     331            0 :                         if (private->restoreCommand == NULL)
     332              :                         {
     333            0 :                                 pg_log_error("could not open file \"%s\": %m", xlogfpath);
     334            0 :                                 return -1;
     335              :                         }
     336              : 
     337              :                         /*
     338              :                          * Since we have restore_command, then try to retrieve missing WAL
     339              :                          * file from the archive.
     340              :                          */
     341            0 :                         xlogreadfd = RestoreArchivedFile(xlogreader->segcxt.ws_dir,
     342            0 :                                                                                          xlogfname,
     343            0 :                                                                                          WalSegSz,
     344            0 :                                                                                          private->restoreCommand);
     345              : 
     346            0 :                         if (xlogreadfd < 0)
     347            0 :                                 return -1;
     348              :                         else
     349            0 :                                 pg_log_debug("using file \"%s\" restored from archive",
     350              :                                                          xlogfpath);
     351            0 :                 }
     352            0 :         }
     353              : 
     354              :         /*
     355              :          * At this point, we have the right segment open.
     356              :          */
     357            0 :         Assert(xlogreadfd != -1);
     358              : 
     359              :         /* Read the requested page */
     360            0 :         if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
     361              :         {
     362            0 :                 pg_log_error("could not seek in file \"%s\": %m", xlogfpath);
     363            0 :                 return -1;
     364              :         }
     365              : 
     366              : 
     367            0 :         r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
     368            0 :         if (r != XLOG_BLCKSZ)
     369              :         {
     370            0 :                 if (r < 0)
     371            0 :                         pg_log_error("could not read file \"%s\": %m", xlogfpath);
     372              :                 else
     373            0 :                         pg_log_error("could not read file \"%s\": read %d of %zu",
     374              :                                                  xlogfpath, r, (Size) XLOG_BLCKSZ);
     375              : 
     376            0 :                 return -1;
     377              :         }
     378              : 
     379            0 :         Assert(targetSegNo == xlogreadsegno);
     380              : 
     381            0 :         xlogreader->seg.ws_tli = targetHistory[private->tliIndex].tli;
     382            0 :         return XLOG_BLCKSZ;
     383            0 : }
     384              : 
     385              : /*
     386              :  * Extract information on which blocks the current record modifies.
     387              :  */
     388              : static void
     389            0 : extractPageInfo(XLogReaderState *record)
     390              : {
     391            0 :         int                     block_id;
     392            0 :         RmgrId          rmid = XLogRecGetRmid(record);
     393            0 :         uint8           info = XLogRecGetInfo(record);
     394            0 :         uint8           rminfo = info & ~XLR_INFO_MASK;
     395              : 
     396              :         /* Is this a special record type that I recognize? */
     397              : 
     398            0 :         if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_FILE_COPY)
     399              :         {
     400              :                 /*
     401              :                  * New databases can be safely ignored. It won't be present in the
     402              :                  * source system, so it will be deleted. There's one corner-case,
     403              :                  * though: if a new, different, database is also created in the source
     404              :                  * system, we'll see that the files already exist and not copy them.
     405              :                  * That's OK, though; WAL replay of creating the new database, from
     406              :                  * the source systems's WAL, will re-copy the new database,
     407              :                  * overwriting the database created in the target system.
     408              :                  */
     409            0 :         }
     410            0 :         else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_WAL_LOG)
     411              :         {
     412              :                 /*
     413              :                  * New databases can be safely ignored. It won't be present in the
     414              :                  * source system, so it will be deleted.
     415              :                  */
     416            0 :         }
     417            0 :         else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
     418              :         {
     419              :                 /*
     420              :                  * An existing database was dropped. We'll see that the files don't
     421              :                  * exist in the target data dir, and copy them in toto from the source
     422              :                  * system. No need to do anything special here.
     423              :                  */
     424            0 :         }
     425            0 :         else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
     426              :         {
     427              :                 /*
     428              :                  * We can safely ignore these. The file will be removed from the
     429              :                  * target, if it doesn't exist in source system. If a file with same
     430              :                  * name is created in source system, too, there will be WAL records
     431              :                  * for all the blocks in it.
     432              :                  */
     433            0 :         }
     434            0 :         else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
     435              :         {
     436              :                 /*
     437              :                  * We can safely ignore these. When we compare the sizes later on,
     438              :                  * we'll notice that they differ, and copy the missing tail from
     439              :                  * source system.
     440              :                  */
     441            0 :         }
     442            0 :         else if (rmid == RM_XACT_ID &&
     443            0 :                          ((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT ||
     444            0 :                           (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED ||
     445            0 :                           (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT ||
     446            0 :                           (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT_PREPARED))
     447              :         {
     448              :                 /*
     449              :                  * These records can include "dropped rels". We can safely ignore
     450              :                  * them, we will see that they are missing and copy them from the
     451              :                  * source.
     452              :                  */
     453            0 :         }
     454            0 :         else if (info & XLR_SPECIAL_REL_UPDATE)
     455              :         {
     456              :                 /*
     457              :                  * This record type modifies a relation file in some special way, but
     458              :                  * we don't recognize the type. That's bad - we don't know how to
     459              :                  * track that change.
     460              :                  */
     461            0 :                 pg_fatal("WAL record modifies a relation, but record type is not recognized:\n"
     462              :                                  "lsn: %X/%08X, rmid: %d, rmgr: %s, info: %02X",
     463              :                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
     464              :                                  rmid, RmgrName(rmid), info);
     465            0 :         }
     466              : 
     467            0 :         for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
     468              :         {
     469            0 :                 RelFileLocator rlocator;
     470            0 :                 ForkNumber      forknum;
     471            0 :                 BlockNumber blkno;
     472              : 
     473            0 :                 if (!XLogRecGetBlockTagExtended(record, block_id,
     474              :                                                                                 &rlocator, &forknum, &blkno, NULL))
     475            0 :                         continue;
     476              : 
     477              :                 /* We only care about the main fork; others are copied in toto */
     478            0 :                 if (forknum != MAIN_FORKNUM)
     479            0 :                         continue;
     480              : 
     481            0 :                 process_target_wal_block_change(forknum, rlocator, blkno);
     482            0 :         }
     483            0 : }
        

Generated by: LCOV version 2.3.2-1