LCOV - code coverage report
Current view: top level - src/backend/storage/sync - sync.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 48.8 % 170 83
Test Date: 2026-01-26 10:56:24 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 20.6 % 131 27

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * sync.c
       4                 :             :  *        File synchronization management code.
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/storage/sync/sync.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include <unistd.h>
      18                 :             : #include <fcntl.h>
      19                 :             : #include <sys/file.h>
      20                 :             : 
      21                 :             : #include "access/clog.h"
      22                 :             : #include "access/commit_ts.h"
      23                 :             : #include "access/multixact.h"
      24                 :             : #include "access/xlog.h"
      25                 :             : #include "miscadmin.h"
      26                 :             : #include "pgstat.h"
      27                 :             : #include "portability/instr_time.h"
      28                 :             : #include "postmaster/bgwriter.h"
      29                 :             : #include "storage/fd.h"
      30                 :             : #include "storage/latch.h"
      31                 :             : #include "storage/md.h"
      32                 :             : #include "utils/hsearch.h"
      33                 :             : #include "utils/memutils.h"
      34                 :             : 
      35                 :             : /*
      36                 :             :  * In some contexts (currently, standalone backends and the checkpointer)
      37                 :             :  * we keep track of pending fsync operations: we need to remember all relation
      38                 :             :  * segments that have been written since the last checkpoint, so that we can
      39                 :             :  * fsync them down to disk before completing the next checkpoint.  This hash
      40                 :             :  * table remembers the pending operations.  We use a hash table mostly as
      41                 :             :  * a convenient way of merging duplicate requests.
      42                 :             :  *
      43                 :             :  * We use a similar mechanism to remember no-longer-needed files that can
      44                 :             :  * be deleted after the next checkpoint, but we use a linked list instead of
      45                 :             :  * a hash table, because we don't expect there to be any duplicate requests.
      46                 :             :  *
      47                 :             :  * These mechanisms are only used for non-temp relations; we never fsync
      48                 :             :  * temp rels, nor do we need to postpone their deletion (see comments in
      49                 :             :  * mdunlink).
      50                 :             :  *
      51                 :             :  * (Regular backends do not track pending operations locally, but forward
      52                 :             :  * them to the checkpointer.)
      53                 :             :  */
      54                 :             : typedef uint16 CycleCtr;                /* can be any convenient integer size */
      55                 :             : 
      56                 :             : typedef struct
      57                 :             : {
      58                 :             :         FileTag         tag;                    /* identifies handler and file */
      59                 :             :         CycleCtr        cycle_ctr;              /* sync_cycle_ctr of oldest request */
      60                 :             :         bool            canceled;               /* canceled is true if we canceled "recently" */
      61                 :             : } PendingFsyncEntry;
      62                 :             : 
      63                 :             : typedef struct
      64                 :             : {
      65                 :             :         FileTag         tag;                    /* identifies handler and file */
      66                 :             :         CycleCtr        cycle_ctr;              /* checkpoint_cycle_ctr when request was made */
      67                 :             :         bool            canceled;               /* true if request has been canceled */
      68                 :             : } PendingUnlinkEntry;
      69                 :             : 
      70                 :             : static HTAB *pendingOps = NULL;
      71                 :             : static List *pendingUnlinks = NIL;
      72                 :             : static MemoryContext pendingOpsCxt; /* context for the above  */
      73                 :             : 
      74                 :             : static CycleCtr sync_cycle_ctr = 0;
      75                 :             : static CycleCtr checkpoint_cycle_ctr = 0;
      76                 :             : 
      77                 :             : /* Intervals for calling AbsorbSyncRequests */
      78                 :             : #define FSYNCS_PER_ABSORB               10
      79                 :             : #define UNLINKS_PER_ABSORB              10
      80                 :             : 
      81                 :             : /*
      82                 :             :  * Function pointers for handling sync and unlink requests.
      83                 :             :  */
      84                 :             : typedef struct SyncOps
      85                 :             : {
      86                 :             :         int                     (*sync_syncfiletag) (const FileTag *ftag, char *path);
      87                 :             :         int                     (*sync_unlinkfiletag) (const FileTag *ftag, char *path);
      88                 :             :         bool            (*sync_filetagmatches) (const FileTag *ftag,
      89                 :             :                                                                                 const FileTag *candidate);
      90                 :             : } SyncOps;
      91                 :             : 
      92                 :             : /*
      93                 :             :  * These indexes must correspond to the values of the SyncRequestHandler enum.
      94                 :             :  */
      95                 :             : static const SyncOps syncsw[] = {
      96                 :             :         /* magnetic disk */
      97                 :             :         [SYNC_HANDLER_MD] = {
      98                 :             :                 .sync_syncfiletag = mdsyncfiletag,
      99                 :             :                 .sync_unlinkfiletag = mdunlinkfiletag,
     100                 :             :                 .sync_filetagmatches = mdfiletagmatches
     101                 :             :         },
     102                 :             :         /* pg_xact */
     103                 :             :         [SYNC_HANDLER_CLOG] = {
     104                 :             :                 .sync_syncfiletag = clogsyncfiletag
     105                 :             :         },
     106                 :             :         /* pg_commit_ts */
     107                 :             :         [SYNC_HANDLER_COMMIT_TS] = {
     108                 :             :                 .sync_syncfiletag = committssyncfiletag
     109                 :             :         },
     110                 :             :         /* pg_multixact/offsets */
     111                 :             :         [SYNC_HANDLER_MULTIXACT_OFFSET] = {
     112                 :             :                 .sync_syncfiletag = multixactoffsetssyncfiletag
     113                 :             :         },
     114                 :             :         /* pg_multixact/members */
     115                 :             :         [SYNC_HANDLER_MULTIXACT_MEMBER] = {
     116                 :             :                 .sync_syncfiletag = multixactmemberssyncfiletag
     117                 :             :         }
     118                 :             : };
     119                 :             : 
     120                 :             : /*
     121                 :             :  * Initialize data structures for the file sync tracking.
     122                 :             :  */
     123                 :             : void
     124                 :         806 : InitSync(void)
     125                 :             : {
     126                 :             :         /*
     127                 :             :          * Create pending-operations hashtable if we need it.  Currently, we need
     128                 :             :          * it if we are standalone (not under a postmaster) or if we are a
     129                 :             :          * checkpointer auxiliary process.
     130                 :             :          */
     131   [ +  +  +  + ]:         806 :         if (!IsUnderPostmaster || AmCheckpointerProcess())
     132                 :             :         {
     133                 :           3 :                 HASHCTL         hash_ctl;
     134                 :             : 
     135                 :             :                 /*
     136                 :             :                  * XXX: The checkpointer needs to add entries to the pending ops table
     137                 :             :                  * when absorbing fsync requests.  That is done within a critical
     138                 :             :                  * section, which isn't usually allowed, but we make an exception. It
     139                 :             :                  * means that there's a theoretical possibility that you run out of
     140                 :             :                  * memory while absorbing fsync requests, which leads to a PANIC.
     141                 :             :                  * Fortunately the hash table is small so that's unlikely to happen in
     142                 :             :                  * practice.
     143                 :             :                  */
     144                 :           3 :                 pendingOpsCxt = AllocSetContextCreate(TopMemoryContext,
     145                 :             :                                                                                           "Pending ops context",
     146                 :             :                                                                                           ALLOCSET_DEFAULT_SIZES);
     147                 :           3 :                 MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
     148                 :             : 
     149                 :           3 :                 hash_ctl.keysize = sizeof(FileTag);
     150                 :           3 :                 hash_ctl.entrysize = sizeof(PendingFsyncEntry);
     151                 :           3 :                 hash_ctl.hcxt = pendingOpsCxt;
     152                 :           3 :                 pendingOps = hash_create("Pending Ops Table",
     153                 :             :                                                                  100L,
     154                 :             :                                                                  &hash_ctl,
     155                 :             :                                                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     156                 :           3 :                 pendingUnlinks = NIL;
     157                 :           3 :         }
     158                 :         806 : }
     159                 :             : 
     160                 :             : /*
     161                 :             :  * SyncPreCheckpoint() -- Do pre-checkpoint work
     162                 :             :  *
     163                 :             :  * To distinguish unlink requests that arrived before this checkpoint
     164                 :             :  * started from those that arrived during the checkpoint, we use a cycle
     165                 :             :  * counter similar to the one we use for fsync requests. That cycle
     166                 :             :  * counter is incremented here.
     167                 :             :  *
     168                 :             :  * This must be called *before* the checkpoint REDO point is determined.
     169                 :             :  * That ensures that we won't delete files too soon.  Since this calls
     170                 :             :  * AbsorbSyncRequests(), which performs memory allocations, it cannot be
     171                 :             :  * called within a critical section.
     172                 :             :  *
     173                 :             :  * Note that we can't do anything here that depends on the assumption
     174                 :             :  * that the checkpoint will be completed.
     175                 :             :  */
     176                 :             : void
     177                 :           7 : SyncPreCheckpoint(void)
     178                 :             : {
     179                 :             :         /*
     180                 :             :          * Operations such as DROP TABLESPACE assume that the next checkpoint will
     181                 :             :          * process all recently forwarded unlink requests, but if they aren't
     182                 :             :          * absorbed prior to advancing the cycle counter, they won't be processed
     183                 :             :          * until a future checkpoint.  The following absorb ensures that any
     184                 :             :          * unlink requests forwarded before the checkpoint began will be processed
     185                 :             :          * in the current checkpoint.
     186                 :             :          */
     187                 :           7 :         AbsorbSyncRequests();
     188                 :             : 
     189                 :             :         /*
     190                 :             :          * Any unlink requests arriving after this point will be assigned the next
     191                 :             :          * cycle counter, and won't be unlinked until next checkpoint.
     192                 :             :          */
     193                 :           7 :         checkpoint_cycle_ctr++;
     194                 :           7 : }
     195                 :             : 
     196                 :             : /*
     197                 :             :  * SyncPostCheckpoint() -- Do post-checkpoint work
     198                 :             :  *
     199                 :             :  * Remove any lingering files that can now be safely removed.
     200                 :             :  */
     201                 :             : void
     202                 :           7 : SyncPostCheckpoint(void)
     203                 :             : {
     204                 :           7 :         int                     absorb_counter;
     205                 :           7 :         ListCell   *lc;
     206                 :             : 
     207                 :           7 :         absorb_counter = UNLINKS_PER_ABSORB;
     208   [ -  +  #  #  :           7 :         foreach(lc, pendingUnlinks)
                   +  - ]
     209                 :             :         {
     210                 :           0 :                 PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
     211                 :           0 :                 char            path[MAXPGPATH];
     212                 :             : 
     213                 :             :                 /* Skip over any canceled entries */
     214         [ #  # ]:           0 :                 if (entry->canceled)
     215                 :           0 :                         continue;
     216                 :             : 
     217                 :             :                 /*
     218                 :             :                  * New entries are appended to the end, so if the entry is new we've
     219                 :             :                  * reached the end of old entries.
     220                 :             :                  *
     221                 :             :                  * Note: if just the right number of consecutive checkpoints fail, we
     222                 :             :                  * could be fooled here by cycle_ctr wraparound.  However, the only
     223                 :             :                  * consequence is that we'd delay unlinking for one more checkpoint,
     224                 :             :                  * which is perfectly tolerable.
     225                 :             :                  */
     226         [ #  # ]:           0 :                 if (entry->cycle_ctr == checkpoint_cycle_ctr)
     227                 :           0 :                         break;
     228                 :             : 
     229                 :             :                 /* Unlink the file */
     230                 :           0 :                 if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
     231   [ #  #  #  # ]:           0 :                                                                                                                   path) < 0)
     232                 :             :                 {
     233                 :             :                         /*
     234                 :             :                          * There's a race condition, when the database is dropped at the
     235                 :             :                          * same time that we process the pending unlink requests. If the
     236                 :             :                          * DROP DATABASE deletes the file before we do, we will get ENOENT
     237                 :             :                          * here. rmtree() also has to ignore ENOENT errors, to deal with
     238                 :             :                          * the possibility that we delete the file first.
     239                 :             :                          */
     240         [ #  # ]:           0 :                         if (errno != ENOENT)
     241   [ #  #  #  # ]:           0 :                                 ereport(WARNING,
     242                 :             :                                                 (errcode_for_file_access(),
     243                 :             :                                                  errmsg("could not remove file \"%s\": %m", path)));
     244                 :           0 :                 }
     245                 :             : 
     246                 :             :                 /* Mark the list entry as canceled, just in case */
     247                 :           0 :                 entry->canceled = true;
     248                 :             : 
     249                 :             :                 /*
     250                 :             :                  * As in ProcessSyncRequests, we don't want to stop absorbing fsync
     251                 :             :                  * requests for a long time when there are many deletions to be done.
     252                 :             :                  * We can safely call AbsorbSyncRequests() at this point in the loop.
     253                 :             :                  */
     254         [ #  # ]:           0 :                 if (--absorb_counter <= 0)
     255                 :             :                 {
     256                 :           0 :                         AbsorbSyncRequests();
     257                 :           0 :                         absorb_counter = UNLINKS_PER_ABSORB;
     258                 :           0 :                 }
     259      [ #  #  # ]:           0 :         }
     260                 :             : 
     261                 :             :         /*
     262                 :             :          * If we reached the end of the list, we can just remove the whole list
     263                 :             :          * (remembering to pfree all the PendingUnlinkEntry objects).  Otherwise,
     264                 :             :          * we must keep the entries at or after "lc".
     265                 :             :          */
     266         [ -  + ]:           7 :         if (lc == NULL)
     267                 :             :         {
     268                 :           7 :                 list_free_deep(pendingUnlinks);
     269                 :           7 :                 pendingUnlinks = NIL;
     270                 :           7 :         }
     271                 :             :         else
     272                 :             :         {
     273                 :           0 :                 int                     ntodelete = list_cell_number(pendingUnlinks, lc);
     274                 :             : 
     275         [ #  # ]:           0 :                 for (int i = 0; i < ntodelete; i++)
     276                 :           0 :                         pfree(list_nth(pendingUnlinks, i));
     277                 :             : 
     278                 :           0 :                 pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
     279                 :           0 :         }
     280                 :           7 : }
     281                 :             : 
     282                 :             : /*
     283                 :             :  *      ProcessSyncRequests() -- Process queued fsync requests.
     284                 :             :  */
     285                 :             : void
     286                 :           7 : ProcessSyncRequests(void)
     287                 :             : {
     288                 :             :         static bool sync_in_progress = false;
     289                 :             : 
     290                 :           7 :         HASH_SEQ_STATUS hstat;
     291                 :           7 :         PendingFsyncEntry *entry;
     292                 :           7 :         int                     absorb_counter;
     293                 :             : 
     294                 :             :         /* Statistics on sync times */
     295                 :           7 :         int                     processed = 0;
     296                 :           7 :         instr_time      sync_start,
     297                 :             :                                 sync_end,
     298                 :             :                                 sync_diff;
     299                 :           7 :         uint64          elapsed;
     300                 :           7 :         uint64          longest = 0;
     301                 :           7 :         uint64          total_elapsed = 0;
     302                 :             : 
     303                 :             :         /*
     304                 :             :          * This is only called during checkpoints, and checkpoints should only
     305                 :             :          * occur in processes that have created a pendingOps.
     306                 :             :          */
     307         [ +  - ]:           7 :         if (!pendingOps)
     308   [ #  #  #  # ]:           0 :                 elog(ERROR, "cannot sync without a pendingOps table");
     309                 :             : 
     310                 :             :         /*
     311                 :             :          * If we are in the checkpointer, the sync had better include all fsync
     312                 :             :          * requests that were queued by backends up to this point.  The tightest
     313                 :             :          * race condition that could occur is that a buffer that must be written
     314                 :             :          * and fsync'd for the checkpoint could have been dumped by a backend just
     315                 :             :          * before it was visited by BufferSync().  We know the backend will have
     316                 :             :          * queued an fsync request before clearing the buffer's dirtybit, so we
     317                 :             :          * are safe as long as we do an Absorb after completing BufferSync().
     318                 :             :          */
     319                 :           7 :         AbsorbSyncRequests();
     320                 :             : 
     321                 :             :         /*
     322                 :             :          * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
     323                 :             :          * checkpoint), we want to ignore fsync requests that are entered into the
     324                 :             :          * hashtable after this point --- they should be processed next time,
     325                 :             :          * instead.  We use sync_cycle_ctr to tell old entries apart from new
     326                 :             :          * ones: new ones will have cycle_ctr equal to the incremented value of
     327                 :             :          * sync_cycle_ctr.
     328                 :             :          *
     329                 :             :          * In normal circumstances, all entries present in the table at this point
     330                 :             :          * will have cycle_ctr exactly equal to the current (about to be old)
     331                 :             :          * value of sync_cycle_ctr.  However, if we fail partway through the
     332                 :             :          * fsync'ing loop, then older values of cycle_ctr might remain when we
     333                 :             :          * come back here to try again.  Repeated checkpoint failures would
     334                 :             :          * eventually wrap the counter around to the point where an old entry
     335                 :             :          * might appear new, causing us to skip it, possibly allowing a checkpoint
     336                 :             :          * to succeed that should not have.  To forestall wraparound, any time the
     337                 :             :          * previous ProcessSyncRequests() failed to complete, run through the
     338                 :             :          * table and forcibly set cycle_ctr = sync_cycle_ctr.
     339                 :             :          *
     340                 :             :          * Think not to merge this loop with the main loop, as the problem is
     341                 :             :          * exactly that that loop may fail before having visited all the entries.
     342                 :             :          * From a performance point of view it doesn't matter anyway, as this path
     343                 :             :          * will never be taken in a system that's functioning normally.
     344                 :             :          */
     345         [ +  - ]:           7 :         if (sync_in_progress)
     346                 :             :         {
     347                 :             :                 /* prior try failed, so update any stale cycle_ctr values */
     348                 :           0 :                 hash_seq_init(&hstat, pendingOps);
     349         [ #  # ]:           0 :                 while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     350                 :             :                 {
     351                 :           0 :                         entry->cycle_ctr = sync_cycle_ctr;
     352                 :             :                 }
     353                 :           0 :         }
     354                 :             : 
     355                 :             :         /* Advance counter so that new hashtable entries are distinguishable */
     356                 :           7 :         sync_cycle_ctr++;
     357                 :             : 
     358                 :             :         /* Set flag to detect failure if we don't reach the end of the loop */
     359                 :           7 :         sync_in_progress = true;
     360                 :             : 
     361                 :             :         /* Now scan the hashtable for fsync requests to process */
     362                 :           7 :         absorb_counter = FSYNCS_PER_ABSORB;
     363                 :           7 :         hash_seq_init(&hstat, pendingOps);
     364         [ +  + ]:         913 :         while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     365                 :             :         {
     366                 :         906 :                 int                     failures;
     367                 :             : 
     368                 :             :                 /*
     369                 :             :                  * If the entry is new then don't process it this time; it is new.
     370                 :             :                  * Note "continue" bypasses the hash-remove call at the bottom of the
     371                 :             :                  * loop.
     372                 :             :                  */
     373         [ -  + ]:         906 :                 if (entry->cycle_ctr == sync_cycle_ctr)
     374                 :           0 :                         continue;
     375                 :             : 
     376                 :             :                 /* Else assert we haven't missed it */
     377         [ +  - ]:         906 :                 Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
     378                 :             : 
     379                 :             :                 /*
     380                 :             :                  * If fsync is off then we don't have to bother opening the file at
     381                 :             :                  * all.  (We delay checking until this point so that changing fsync on
     382                 :             :                  * the fly behaves sensibly.)
     383                 :             :                  */
     384         [ +  - ]:         906 :                 if (enableFsync)
     385                 :             :                 {
     386                 :             :                         /*
     387                 :             :                          * If in checkpointer, we want to absorb pending requests every so
     388                 :             :                          * often to prevent overflow of the fsync request queue.  It is
     389                 :             :                          * unspecified whether newly-added entries will be visited by
     390                 :             :                          * hash_seq_search, but we don't care since we don't need to
     391                 :             :                          * process them anyway.
     392                 :             :                          */
     393         [ #  # ]:           0 :                         if (--absorb_counter <= 0)
     394                 :             :                         {
     395                 :           0 :                                 AbsorbSyncRequests();
     396                 :           0 :                                 absorb_counter = FSYNCS_PER_ABSORB;
     397                 :           0 :                         }
     398                 :             : 
     399                 :             :                         /*
     400                 :             :                          * The fsync table could contain requests to fsync segments that
     401                 :             :                          * have been deleted (unlinked) by the time we get to them. Rather
     402                 :             :                          * than just hoping an ENOENT (or EACCES on Windows) error can be
     403                 :             :                          * ignored, what we do on error is absorb pending requests and
     404                 :             :                          * then retry. Since mdunlink() queues a "cancel" message before
     405                 :             :                          * actually unlinking, the fsync request is guaranteed to be
     406                 :             :                          * marked canceled after the absorb if it really was this case.
     407                 :             :                          * DROP DATABASE likewise has to tell us to forget fsync requests
     408                 :             :                          * before it starts deletions.
     409                 :             :                          */
     410         [ #  # ]:           0 :                         for (failures = 0; !entry->canceled; failures++)
     411                 :             :                         {
     412                 :           0 :                                 char            path[MAXPGPATH];
     413                 :             : 
     414                 :           0 :                                 INSTR_TIME_SET_CURRENT(sync_start);
     415                 :           0 :                                 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
     416   [ #  #  #  # ]:           0 :                                                                                                                                 path) == 0)
     417                 :             :                                 {
     418                 :             :                                         /* Success; update statistics about sync timing */
     419                 :           0 :                                         INSTR_TIME_SET_CURRENT(sync_end);
     420                 :           0 :                                         sync_diff = sync_end;
     421                 :           0 :                                         INSTR_TIME_SUBTRACT(sync_diff, sync_start);
     422                 :           0 :                                         elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
     423         [ #  # ]:           0 :                                         if (elapsed > longest)
     424                 :           0 :                                                 longest = elapsed;
     425                 :           0 :                                         total_elapsed += elapsed;
     426                 :           0 :                                         processed++;
     427                 :             : 
     428         [ #  # ]:           0 :                                         if (log_checkpoints)
     429   [ #  #  #  # ]:           0 :                                                 elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
     430                 :             :                                                          processed,
     431                 :             :                                                          path,
     432                 :             :                                                          (double) elapsed / 1000);
     433                 :             : 
     434                 :           0 :                                         break;          /* out of retry loop */
     435                 :             :                                 }
     436                 :             : 
     437                 :             :                                 /*
     438                 :             :                                  * It is possible that the relation has been dropped or
     439                 :             :                                  * truncated since the fsync request was entered. Therefore,
     440                 :             :                                  * allow ENOENT, but only if we didn't fail already on this
     441                 :             :                                  * file.
     442                 :             :                                  */
     443   [ #  #  #  # ]:           0 :                                 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
     444   [ #  #  #  #  :           0 :                                         ereport(data_sync_elevel(ERROR),
                   #  # ]
     445                 :             :                                                         (errcode_for_file_access(),
     446                 :             :                                                          errmsg("could not fsync file \"%s\": %m",
     447                 :             :                                                                         path)));
     448                 :             :                                 else
     449   [ #  #  #  # ]:           0 :                                         ereport(DEBUG1,
     450                 :             :                                                         (errcode_for_file_access(),
     451                 :             :                                                          errmsg_internal("could not fsync file \"%s\" but retrying: %m",
     452                 :             :                                                                                          path)));
     453                 :             : 
     454                 :             :                                 /*
     455                 :             :                                  * Absorb incoming requests and check to see if a cancel
     456                 :             :                                  * arrived for this relation fork.
     457                 :             :                                  */
     458                 :           0 :                                 AbsorbSyncRequests();
     459                 :           0 :                                 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
     460         [ #  # ]:           0 :                         }                                       /* end retry loop */
     461                 :           0 :                 }
     462                 :             : 
     463                 :             :                 /* We are done with this entry, remove it */
     464         [ +  - ]:         906 :                 if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
     465   [ #  #  #  # ]:           0 :                         elog(ERROR, "pendingOps corrupted");
     466         [ -  + ]:         906 :         }                                                       /* end loop over hashtable entries */
     467                 :             : 
     468                 :             :         /* Return sync performance metrics for report at checkpoint end */
     469                 :           7 :         CheckpointStats.ckpt_sync_rels = processed;
     470                 :           7 :         CheckpointStats.ckpt_longest_sync = longest;
     471                 :           7 :         CheckpointStats.ckpt_agg_sync_time = total_elapsed;
     472                 :             : 
     473                 :             :         /* Flag successful completion of ProcessSyncRequests */
     474                 :           7 :         sync_in_progress = false;
     475                 :           7 : }
     476                 :             : 
     477                 :             : /*
     478                 :             :  * RememberSyncRequest() -- callback from checkpointer side of sync request
     479                 :             :  *
     480                 :             :  * We stuff fsync requests into the local hash table for execution
     481                 :             :  * during the checkpointer's next checkpoint.  UNLINK requests go into a
     482                 :             :  * separate linked list, however, because they get processed separately.
     483                 :             :  *
     484                 :             :  * See sync.h for more information on the types of sync requests supported.
     485                 :             :  */
     486                 :             : void
     487                 :        9996 : RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
     488                 :             : {
     489         [ +  - ]:        9996 :         Assert(pendingOps);
     490                 :             : 
     491         [ -  + ]:        9996 :         if (type == SYNC_FORGET_REQUEST)
     492                 :             :         {
     493                 :           0 :                 PendingFsyncEntry *entry;
     494                 :             : 
     495                 :             :                 /* Cancel previously entered request */
     496                 :           0 :                 entry = (PendingFsyncEntry *) hash_search(pendingOps,
     497                 :           0 :                                                                                                   ftag,
     498                 :             :                                                                                                   HASH_FIND,
     499                 :             :                                                                                                   NULL);
     500         [ #  # ]:           0 :                 if (entry != NULL)
     501                 :           0 :                         entry->canceled = true;
     502                 :           0 :         }
     503         [ -  + ]:        9996 :         else if (type == SYNC_FILTER_REQUEST)
     504                 :             :         {
     505                 :           0 :                 HASH_SEQ_STATUS hstat;
     506                 :           0 :                 PendingFsyncEntry *pfe;
     507                 :           0 :                 ListCell   *cell;
     508                 :             : 
     509                 :             :                 /* Cancel matching fsync requests */
     510                 :           0 :                 hash_seq_init(&hstat, pendingOps);
     511         [ #  # ]:           0 :                 while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     512                 :             :                 {
     513   [ #  #  #  # ]:           0 :                         if (pfe->tag.handler == ftag->handler &&
     514                 :           0 :                                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
     515                 :           0 :                                 pfe->canceled = true;
     516                 :             :                 }
     517                 :             : 
     518                 :             :                 /* Cancel matching unlink requests */
     519   [ #  #  #  #  :           0 :                 foreach(cell, pendingUnlinks)
                   #  # ]
     520                 :             :                 {
     521                 :           0 :                         PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell);
     522                 :             : 
     523   [ #  #  #  # ]:           0 :                         if (pue->tag.handler == ftag->handler &&
     524                 :           0 :                                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
     525                 :           0 :                                 pue->canceled = true;
     526                 :           0 :                 }
     527                 :           0 :         }
     528         [ -  + ]:        9996 :         else if (type == SYNC_UNLINK_REQUEST)
     529                 :             :         {
     530                 :             :                 /* Unlink request: put it in the linked list */
     531                 :           0 :                 MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     532                 :           0 :                 PendingUnlinkEntry *entry;
     533                 :             : 
     534                 :           0 :                 entry = palloc_object(PendingUnlinkEntry);
     535                 :           0 :                 entry->tag = *ftag;
     536                 :           0 :                 entry->cycle_ctr = checkpoint_cycle_ctr;
     537                 :           0 :                 entry->canceled = false;
     538                 :             : 
     539                 :           0 :                 pendingUnlinks = lappend(pendingUnlinks, entry);
     540                 :             : 
     541                 :           0 :                 MemoryContextSwitchTo(oldcxt);
     542                 :           0 :         }
     543                 :             :         else
     544                 :             :         {
     545                 :             :                 /* Normal case: enter a request to fsync this segment */
     546                 :        9996 :                 MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     547                 :        9996 :                 PendingFsyncEntry *entry;
     548                 :        9996 :                 bool            found;
     549                 :             : 
     550         [ +  - ]:        9996 :                 Assert(type == SYNC_REQUEST);
     551                 :             : 
     552                 :       19992 :                 entry = (PendingFsyncEntry *) hash_search(pendingOps,
     553                 :        9996 :                                                                                                   ftag,
     554                 :             :                                                                                                   HASH_ENTER,
     555                 :             :                                                                                                   &found);
     556                 :             :                 /* if new entry, or was previously canceled, initialize it */
     557   [ +  +  -  + ]:        9996 :                 if (!found || entry->canceled)
     558                 :             :                 {
     559                 :         906 :                         entry->cycle_ctr = sync_cycle_ctr;
     560                 :         906 :                         entry->canceled = false;
     561                 :         906 :                 }
     562                 :             : 
     563                 :             :                 /*
     564                 :             :                  * NB: it's intentional that we don't change cycle_ctr if the entry
     565                 :             :                  * already exists.  The cycle_ctr must represent the oldest fsync
     566                 :             :                  * request that could be in the entry.
     567                 :             :                  */
     568                 :             : 
     569                 :        9996 :                 MemoryContextSwitchTo(oldcxt);
     570                 :        9996 :         }
     571                 :        9996 : }
     572                 :             : 
     573                 :             : /*
     574                 :             :  * Register the sync request locally, or forward it to the checkpointer.
     575                 :             :  *
     576                 :             :  * If retryOnError is true, we'll keep trying if there is no space in the
     577                 :             :  * queue.  Return true if we succeeded, or false if there wasn't space.
     578                 :             :  */
     579                 :             : bool
     580                 :       77976 : RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
     581                 :             :                                         bool retryOnError)
     582                 :             : {
     583                 :       77976 :         bool            ret;
     584                 :             : 
     585         [ +  + ]:       77976 :         if (pendingOps != NULL)
     586                 :             :         {
     587                 :             :                 /* standalone backend or startup process: fsync state is local */
     588                 :        6371 :                 RememberSyncRequest(ftag, type);
     589                 :        6371 :                 return true;
     590                 :             :         }
     591                 :             : 
     592                 :       71605 :         for (;;)
     593                 :             :         {
     594                 :             :                 /*
     595                 :             :                  * Notify the checkpointer about it.  If we fail to queue a message in
     596                 :             :                  * retryOnError mode, we have to sleep and try again ... ugly, but
     597                 :             :                  * hopefully won't happen often.
     598                 :             :                  *
     599                 :             :                  * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an
     600                 :             :                  * error in the case of SYNC_UNLINK_REQUEST would leave the
     601                 :             :                  * no-longer-used file still present on disk, which would be bad, so
     602                 :             :                  * I'm inclined to assume that the checkpointer will always empty the
     603                 :             :                  * queue soon.
     604                 :             :                  */
     605                 :       71605 :                 ret = ForwardSyncRequest(ftag, type);
     606                 :             : 
     607                 :             :                 /*
     608                 :             :                  * If we are successful in queueing the request, or we failed and were
     609                 :             :                  * instructed not to retry on error, break.
     610                 :             :                  */
     611   [ -  +  #  #  :       71605 :                 if (ret || (!ret && !retryOnError))
                   #  # ]
     612                 :       71605 :                         break;
     613                 :             : 
     614                 :           0 :                 WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
     615                 :             :                                   WAIT_EVENT_REGISTER_SYNC_REQUEST);
     616                 :             :         }
     617                 :             : 
     618                 :       71605 :         return ret;
     619                 :       77976 : }
        

Generated by: LCOV version 2.3.2-1