LCOV - code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 87.3 % 220 192
Test Date: 2026-01-26 10:56:24 Functions: 93.3 % 15 14
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 54.4 % 114 62

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * freelist.c
       4                 :             :  *        routines for managing the buffer pool's replacement strategy.
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  *
      11                 :             :  * IDENTIFICATION
      12                 :             :  *        src/backend/storage/buffer/freelist.c
      13                 :             :  *
      14                 :             :  *-------------------------------------------------------------------------
      15                 :             :  */
      16                 :             : #include "postgres.h"
      17                 :             : 
      18                 :             : #include "pgstat.h"
      19                 :             : #include "port/atomics.h"
      20                 :             : #include "storage/buf_internals.h"
      21                 :             : #include "storage/bufmgr.h"
      22                 :             : #include "storage/proc.h"
      23                 :             : 
      24                 :             : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      25                 :             : 
      26                 :             : 
      27                 :             : /*
      28                 :             :  * The shared freelist control information.
      29                 :             :  */
      30                 :             : typedef struct
      31                 :             : {
      32                 :             :         /* Spinlock: protects the values below */
      33                 :             :         slock_t         buffer_strategy_lock;
      34                 :             : 
      35                 :             :         /*
      36                 :             :          * clock-sweep hand: index of next buffer to consider grabbing. Note that
      37                 :             :          * this isn't a concrete buffer - we only ever increase the value. So, to
      38                 :             :          * get an actual buffer, it needs to be used modulo NBuffers.
      39                 :             :          */
      40                 :             :         pg_atomic_uint32 nextVictimBuffer;
      41                 :             : 
      42                 :             :         /*
      43                 :             :          * Statistics.  These counters should be wide enough that they can't
      44                 :             :          * overflow during a single bgwriter cycle.
      45                 :             :          */
      46                 :             :         uint32          completePasses; /* Complete cycles of the clock-sweep */
      47                 :             :         pg_atomic_uint32 numBufferAllocs;       /* Buffers allocated since last reset */
      48                 :             : 
      49                 :             :         /*
      50                 :             :          * Bgworker process to be notified upon activity or -1 if none. See
      51                 :             :          * StrategyNotifyBgWriter.
      52                 :             :          */
      53                 :             :         int                     bgwprocno;
      54                 :             : } BufferStrategyControl;
      55                 :             : 
      56                 :             : /* Pointers to shared state */
      57                 :             : static BufferStrategyControl *StrategyControl = NULL;
      58                 :             : 
      59                 :             : /*
      60                 :             :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      61                 :             :  * This is currently the only kind of BufferAccessStrategy object, but someday
      62                 :             :  * we might have more kinds.
      63                 :             :  */
      64                 :             : typedef struct BufferAccessStrategyData
      65                 :             : {
      66                 :             :         /* Overall strategy type */
      67                 :             :         BufferAccessStrategyType btype;
      68                 :             :         /* Number of elements in buffers[] array */
      69                 :             :         int                     nbuffers;
      70                 :             : 
      71                 :             :         /*
      72                 :             :          * Index of the "current" slot in the ring, ie, the one most recently
      73                 :             :          * returned by GetBufferFromRing.
      74                 :             :          */
      75                 :             :         int                     current;
      76                 :             : 
      77                 :             :         /*
      78                 :             :          * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      79                 :             :          * have not yet selected a buffer for this ring slot.  For allocation
      80                 :             :          * simplicity this is palloc'd together with the fixed fields of the
      81                 :             :          * struct.
      82                 :             :          */
      83                 :             :         Buffer          buffers[FLEXIBLE_ARRAY_MEMBER];
      84                 :             : }                       BufferAccessStrategyData;
      85                 :             : 
      86                 :             : 
      87                 :             : /* Prototypes for internal functions */
      88                 :             : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      89                 :             :                                                                          uint64 *buf_state);
      90                 :             : static void AddBufferToRing(BufferAccessStrategy strategy,
      91                 :             :                                                         BufferDesc *buf);
      92                 :             : 
      93                 :             : /*
      94                 :             :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
      95                 :             :  *
      96                 :             :  * Move the clock hand one buffer ahead of its current position and return the
      97                 :             :  * id of the buffer now under the hand.
      98                 :             :  */
      99                 :             : static inline uint32
     100                 :       66394 : ClockSweepTick(void)
     101                 :             : {
     102                 :       66394 :         uint32          victim;
     103                 :             : 
     104                 :             :         /*
     105                 :             :          * Atomically move hand ahead one buffer - if there's several processes
     106                 :             :          * doing this, this can lead to buffers being returned slightly out of
     107                 :             :          * apparent order.
     108                 :             :          */
     109                 :       66394 :         victim =
     110                 :       66394 :                 pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     111                 :             : 
     112         [ +  + ]:       66394 :         if (victim >= NBuffers)
     113                 :             :         {
     114                 :           3 :                 uint32          originalVictim = victim;
     115                 :             : 
     116                 :             :                 /* always wrap what we look up in BufferDescriptors */
     117                 :           3 :                 victim = victim % NBuffers;
     118                 :             : 
     119                 :             :                 /*
     120                 :             :                  * If we're the one that just caused a wraparound, force
     121                 :             :                  * completePasses to be incremented while holding the spinlock. We
     122                 :             :                  * need the spinlock so StrategySyncStart() can return a consistent
     123                 :             :                  * value consisting of nextVictimBuffer and completePasses.
     124                 :             :                  */
     125         [ +  - ]:           3 :                 if (victim == 0)
     126                 :             :                 {
     127                 :           3 :                         uint32          expected;
     128                 :           3 :                         uint32          wrapped;
     129                 :           3 :                         bool            success = false;
     130                 :             : 
     131                 :           3 :                         expected = originalVictim + 1;
     132                 :             : 
     133         [ +  + ]:           6 :                         while (!success)
     134                 :             :                         {
     135                 :             :                                 /*
     136                 :             :                                  * Acquire the spinlock while increasing completePasses. That
     137                 :             :                                  * allows other readers to read nextVictimBuffer and
     138                 :             :                                  * completePasses in a consistent manner which is required for
     139                 :             :                                  * StrategySyncStart().  In theory delaying the increment
     140                 :             :                                  * could lead to an overflow of nextVictimBuffers, but that's
     141                 :             :                                  * highly unlikely and wouldn't be particularly harmful.
     142                 :             :                                  */
     143         [ -  + ]:           3 :                                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     144                 :             : 
     145                 :           3 :                                 wrapped = expected % NBuffers;
     146                 :             : 
     147                 :           6 :                                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     148                 :           3 :                                                                                                                  &expected, wrapped);
     149         [ -  + ]:           3 :                                 if (success)
     150                 :           3 :                                         StrategyControl->completePasses++;
     151                 :           3 :                                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     152                 :             :                         }
     153                 :           3 :                 }
     154                 :           3 :         }
     155                 :      132788 :         return victim;
     156                 :       66394 : }
     157                 :             : 
     158                 :             : /*
     159                 :             :  * StrategyGetBuffer
     160                 :             :  *
     161                 :             :  *      Called by the bufmgr to get the next candidate buffer to use in
     162                 :             :  *      GetVictimBuffer(). The only hard requirement GetVictimBuffer() has is that
     163                 :             :  *      the selected buffer must not currently be pinned by anyone.
     164                 :             :  *
     165                 :             :  *      strategy is a BufferAccessStrategy object, or NULL for default strategy.
     166                 :             :  *
     167                 :             :  *      It is the callers responsibility to ensure the buffer ownership can be
     168                 :             :  *      tracked via TrackNewBufferPin().
     169                 :             :  *
     170                 :             :  *      The buffer is pinned and marked as owned, using TrackNewBufferPin(),
     171                 :             :  *      before returning.
     172                 :             :  */
     173                 :             : BufferDesc *
     174                 :       40537 : StrategyGetBuffer(BufferAccessStrategy strategy, uint64 *buf_state, bool *from_ring)
     175                 :             : {
     176                 :       40537 :         BufferDesc *buf;
     177                 :       40537 :         int                     bgwprocno;
     178                 :       40537 :         int                     trycounter;
     179                 :             : 
     180                 :       40537 :         *from_ring = false;
     181                 :             : 
     182                 :             :         /*
     183                 :             :          * If given a strategy object, see whether it can select a buffer. We
     184                 :             :          * assume strategy objects don't need buffer_strategy_lock.
     185                 :             :          */
     186         [ +  + ]:       40537 :         if (strategy != NULL)
     187                 :             :         {
     188                 :        9305 :                 buf = GetBufferFromRing(strategy, buf_state);
     189         [ +  + ]:        9305 :                 if (buf != NULL)
     190                 :             :                 {
     191                 :          17 :                         *from_ring = true;
     192                 :          17 :                         return buf;
     193                 :             :                 }
     194                 :        9288 :         }
     195                 :             : 
     196                 :             :         /*
     197                 :             :          * If asked, we need to waken the bgwriter. Since we don't want to rely on
     198                 :             :          * a spinlock for this we force a read from shared memory once, and then
     199                 :             :          * set the latch based on that value. We need to go through that length
     200                 :             :          * because otherwise bgwprocno might be reset while/after we check because
     201                 :             :          * the compiler might just reread from memory.
     202                 :             :          *
     203                 :             :          * This can possibly set the latch of the wrong process if the bgwriter
     204                 :             :          * dies in the wrong moment. But since PGPROC->procLatch is never
     205                 :             :          * deallocated the worst consequence of that is that we set the latch of
     206                 :             :          * some arbitrary process.
     207                 :             :          */
     208                 :       40520 :         bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     209         [ +  + ]:       40520 :         if (bgwprocno != -1)
     210                 :             :         {
     211                 :             :                 /* reset bgwprocno first, before setting the latch */
     212                 :           2 :                 StrategyControl->bgwprocno = -1;
     213                 :             : 
     214                 :             :                 /*
     215                 :             :                  * Not acquiring ProcArrayLock here which is slightly icky. It's
     216                 :             :                  * actually fine because procLatch isn't ever freed, so we just can
     217                 :             :                  * potentially set the wrong process' (or no process') latch.
     218                 :             :                  */
     219                 :           2 :                 SetLatch(&GetPGProcByNumber(bgwprocno)->procLatch);
     220                 :           2 :         }
     221                 :             : 
     222                 :             :         /*
     223                 :             :          * We count buffer allocation requests so that the bgwriter can estimate
     224                 :             :          * the rate of buffer consumption.  Note that buffers recycled by a
     225                 :             :          * strategy object are intentionally not counted here.
     226                 :             :          */
     227                 :       40520 :         pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     228                 :             : 
     229                 :             :         /* Use the "clock sweep" algorithm to find a free buffer */
     230                 :       40520 :         trycounter = NBuffers;
     231                 :       66394 :         for (;;)
     232                 :             :         {
     233                 :       66394 :                 uint64          old_buf_state;
     234                 :       66394 :                 uint64          local_buf_state;
     235                 :             : 
     236                 :       66394 :                 buf = GetBufferDescriptor(ClockSweepTick());
     237                 :             : 
     238                 :             :                 /*
     239                 :             :                  * Check whether the buffer can be used and pin it if so. Do this
     240                 :             :                  * using a CAS loop, to avoid having to lock the buffer header.
     241                 :             :                  */
     242                 :       66394 :                 old_buf_state = pg_atomic_read_u64(&buf->state);
     243                 :       66394 :                 for (;;)
     244                 :             :                 {
     245                 :       66394 :                         local_buf_state = old_buf_state;
     246                 :             : 
     247                 :             :                         /*
     248                 :             :                          * If the buffer is pinned or has a nonzero usage_count, we cannot
     249                 :             :                          * use it; decrement the usage_count (unless pinned) and keep
     250                 :             :                          * scanning.
     251                 :             :                          */
     252                 :             : 
     253         [ +  + ]:       66394 :                         if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0)
     254                 :             :                         {
     255         [ +  - ]:           1 :                                 if (--trycounter == 0)
     256                 :             :                                 {
     257                 :             :                                         /*
     258                 :             :                                          * We've scanned all the buffers without making any state
     259                 :             :                                          * changes, so all the buffers are pinned (or were when we
     260                 :             :                                          * looked at them). We could hope that someone will free
     261                 :             :                                          * one eventually, but it's probably better to fail than
     262                 :             :                                          * to risk getting stuck in an infinite loop.
     263                 :             :                                          */
     264   [ #  #  #  # ]:           0 :                                         elog(ERROR, "no unpinned buffers available");
     265                 :           0 :                                 }
     266                 :           1 :                                 break;
     267                 :             :                         }
     268                 :             : 
     269                 :             :                         /* See equivalent code in PinBuffer() */
     270         [ -  + ]:       66393 :                         if (unlikely(local_buf_state & BM_LOCKED))
     271                 :             :                         {
     272                 :           0 :                                 old_buf_state = WaitBufHdrUnlocked(buf);
     273                 :           0 :                                 continue;
     274                 :             :                         }
     275                 :             : 
     276         [ +  + ]:       66393 :                         if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     277                 :             :                         {
     278                 :       25873 :                                 local_buf_state -= BUF_USAGECOUNT_ONE;
     279                 :             : 
     280   [ -  +  -  + ]:       51746 :                                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     281                 :       25873 :                                                                                                    local_buf_state))
     282                 :             :                                 {
     283                 :       25873 :                                         trycounter = NBuffers;
     284                 :       25873 :                                         break;
     285                 :             :                                 }
     286                 :           0 :                         }
     287                 :             :                         else
     288                 :             :                         {
     289                 :             :                                 /* pin the buffer if the CAS succeeds */
     290                 :       40520 :                                 local_buf_state += BUF_REFCOUNT_ONE;
     291                 :             : 
     292   [ +  -  +  - ]:       81040 :                                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     293                 :       40520 :                                                                                                    local_buf_state))
     294                 :             :                                 {
     295                 :             :                                         /* Found a usable buffer */
     296         [ +  + ]:       40520 :                                         if (strategy != NULL)
     297                 :        9288 :                                                 AddBufferToRing(strategy, buf);
     298                 :       40520 :                                         *buf_state = local_buf_state;
     299                 :             : 
     300                 :       40520 :                                         TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     301                 :             : 
     302                 :       40520 :                                         return buf;
     303                 :             :                                 }
     304                 :             :                         }
     305                 :             :                 }
     306         [ +  + ]:       66394 :         }
     307                 :       40537 : }
     308                 :             : 
     309                 :             : /*
     310                 :             :  * StrategySyncStart -- tell BgBufferSync where to start syncing
     311                 :             :  *
     312                 :             :  * The result is the buffer index of the best buffer to sync first.
     313                 :             :  * BgBufferSync() will proceed circularly around the buffer array from there.
     314                 :             :  *
     315                 :             :  * In addition, we return the completed-pass count (which is effectively
     316                 :             :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     317                 :             :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     318                 :             :  * being read.
     319                 :             :  */
     320                 :             : int
     321                 :          24 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     322                 :             : {
     323                 :          24 :         uint32          nextVictimBuffer;
     324                 :          24 :         int                     result;
     325                 :             : 
     326         [ -  + ]:          24 :         SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     327                 :          24 :         nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     328                 :          24 :         result = nextVictimBuffer % NBuffers;
     329                 :             : 
     330         [ -  + ]:          24 :         if (complete_passes)
     331                 :             :         {
     332                 :          24 :                 *complete_passes = StrategyControl->completePasses;
     333                 :             : 
     334                 :             :                 /*
     335                 :             :                  * Additionally add the number of wraparounds that happened before
     336                 :             :                  * completePasses could be incremented. C.f. ClockSweepTick().
     337                 :             :                  */
     338                 :          24 :                 *complete_passes += nextVictimBuffer / NBuffers;
     339                 :          24 :         }
     340                 :             : 
     341         [ -  + ]:          24 :         if (num_buf_alloc)
     342                 :             :         {
     343                 :          24 :                 *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     344                 :          24 :         }
     345                 :          24 :         SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     346                 :          48 :         return result;
     347                 :          24 : }
     348                 :             : 
     349                 :             : /*
     350                 :             :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     351                 :             :  *
     352                 :             :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     353                 :             :  * set that latch.  Pass -1 to clear the pending notification before it
     354                 :             :  * happens.  This feature is used by the bgwriter process to wake itself up
     355                 :             :  * from hibernation, and is not meant for anybody else to use.
     356                 :             :  */
     357                 :             : void
     358                 :           6 : StrategyNotifyBgWriter(int bgwprocno)
     359                 :             : {
     360                 :             :         /*
     361                 :             :          * We acquire buffer_strategy_lock just to ensure that the store appears
     362                 :             :          * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     363                 :             :          * infrequently, so there's no performance penalty from being safe.
     364                 :             :          */
     365         [ -  + ]:           6 :         SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     366                 :           6 :         StrategyControl->bgwprocno = bgwprocno;
     367                 :           6 :         SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     368                 :           6 : }
     369                 :             : 
     370                 :             : 
     371                 :             : /*
     372                 :             :  * StrategyShmemSize
     373                 :             :  *
     374                 :             :  * estimate the size of shared memory used by the freelist-related structures.
     375                 :             :  *
     376                 :             :  * Note: for somewhat historical reasons, the buffer lookup hashtable size
     377                 :             :  * is also determined here.
     378                 :             :  */
     379                 :             : Size
     380                 :           9 : StrategyShmemSize(void)
     381                 :             : {
     382                 :           9 :         Size            size = 0;
     383                 :             : 
     384                 :             :         /* size of lookup hash table ... see comment in StrategyInitialize */
     385                 :           9 :         size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
     386                 :             : 
     387                 :             :         /* size of the shared replacement strategy control block */
     388                 :           9 :         size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
     389                 :             : 
     390                 :          18 :         return size;
     391                 :           9 : }
     392                 :             : 
     393                 :             : /*
     394                 :             :  * StrategyInitialize -- initialize the buffer cache replacement
     395                 :             :  *              strategy.
     396                 :             :  *
     397                 :             :  * Assumes: All of the buffers are already built into a linked list.
     398                 :             :  *              Only called by postmaster and only during initialization.
     399                 :             :  */
     400                 :             : void
     401                 :           6 : StrategyInitialize(bool init)
     402                 :             : {
     403                 :           6 :         bool            found;
     404                 :             : 
     405                 :             :         /*
     406                 :             :          * Initialize the shared buffer lookup hashtable.
     407                 :             :          *
     408                 :             :          * Since we can't tolerate running out of lookup table entries, we must be
     409                 :             :          * sure to specify an adequate table size here.  The maximum steady-state
     410                 :             :          * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     411                 :             :          * a new entry before deleting the old.  In principle this could be
     412                 :             :          * happening in each partition concurrently, so we could need as many as
     413                 :             :          * NBuffers + NUM_BUFFER_PARTITIONS entries.
     414                 :             :          */
     415                 :           6 :         InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
     416                 :             : 
     417                 :             :         /*
     418                 :             :          * Get or create the shared strategy control block
     419                 :             :          */
     420                 :           6 :         StrategyControl = (BufferStrategyControl *)
     421                 :           6 :                 ShmemInitStruct("Buffer Strategy Status",
     422                 :             :                                                 sizeof(BufferStrategyControl),
     423                 :             :                                                 &found);
     424                 :             : 
     425         [ +  - ]:           6 :         if (!found)
     426                 :             :         {
     427                 :             :                 /*
     428                 :             :                  * Only done once, usually in postmaster
     429                 :             :                  */
     430         [ +  - ]:           6 :                 Assert(init);
     431                 :             : 
     432                 :           6 :                 SpinLockInit(&StrategyControl->buffer_strategy_lock);
     433                 :             : 
     434                 :             :                 /* Initialize the clock-sweep pointer */
     435                 :           6 :                 pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     436                 :             : 
     437                 :             :                 /* Clear statistics */
     438                 :           6 :                 StrategyControl->completePasses = 0;
     439                 :           6 :                 pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     440                 :             : 
     441                 :             :                 /* No pending notification */
     442                 :           6 :                 StrategyControl->bgwprocno = -1;
     443                 :           6 :         }
     444                 :             :         else
     445         [ #  # ]:           0 :                 Assert(!init);
     446                 :           6 : }
     447                 :             : 
     448                 :             : 
     449                 :             : /* ----------------------------------------------------------------
     450                 :             :  *                              Backend-private buffer ring management
     451                 :             :  * ----------------------------------------------------------------
     452                 :             :  */
     453                 :             : 
     454                 :             : 
     455                 :             : /*
     456                 :             :  * GetAccessStrategy -- create a BufferAccessStrategy object
     457                 :             :  *
     458                 :             :  * The object is allocated in the current memory context.
     459                 :             :  */
     460                 :             : BufferAccessStrategy
     461                 :        2143 : GetAccessStrategy(BufferAccessStrategyType btype)
     462                 :             : {
     463                 :        2143 :         int                     ring_size_kb;
     464                 :             : 
     465                 :             :         /*
     466                 :             :          * Select ring size to use.  See buffer/README for rationales.
     467                 :             :          *
     468                 :             :          * Note: if you change the ring size for BAS_BULKREAD, see also
     469                 :             :          * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     470                 :             :          */
     471   [ -  +  +  -  :        2143 :         switch (btype)
                      - ]
     472                 :             :         {
     473                 :             :                 case BAS_NORMAL:
     474                 :             :                         /* if someone asks for NORMAL, just give 'em a "default" object */
     475                 :           0 :                         return NULL;
     476                 :             : 
     477                 :             :                 case BAS_BULKREAD:
     478                 :             :                         {
     479                 :         741 :                                 int                     ring_max_kb;
     480                 :             : 
     481                 :             :                                 /*
     482                 :             :                                  * The ring always needs to be large enough to allow some
     483                 :             :                                  * separation in time between providing a buffer to the user
     484                 :             :                                  * of the strategy and that buffer being reused. Otherwise the
     485                 :             :                                  * user's pin will prevent reuse of the buffer, even without
     486                 :             :                                  * concurrent activity.
     487                 :             :                                  *
     488                 :             :                                  * We also need to ensure the ring always is large enough for
     489                 :             :                                  * SYNC_SCAN_REPORT_INTERVAL, as noted above.
     490                 :             :                                  *
     491                 :             :                                  * Thus we start out a minimal size and increase the size
     492                 :             :                                  * further if appropriate.
     493                 :             :                                  */
     494                 :         741 :                                 ring_size_kb = 256;
     495                 :             : 
     496                 :             :                                 /*
     497                 :             :                                  * There's no point in a larger ring if we won't be allowed to
     498                 :             :                                  * pin sufficiently many buffers.  But we never limit to less
     499                 :             :                                  * than the minimal size above.
     500                 :             :                                  */
     501                 :         741 :                                 ring_max_kb = GetPinLimit() * (BLCKSZ / 1024);
     502         [ -  + ]:         741 :                                 ring_max_kb = Max(ring_size_kb, ring_max_kb);
     503                 :             : 
     504                 :             :                                 /*
     505                 :             :                                  * We would like the ring to additionally have space for the
     506                 :             :                                  * configured degree of IO concurrency. While being read in,
     507                 :             :                                  * buffers can obviously not yet be reused.
     508                 :             :                                  *
     509                 :             :                                  * Each IO can be up to io_combine_limit blocks large, and we
     510                 :             :                                  * want to start up to effective_io_concurrency IOs.
     511                 :             :                                  *
     512                 :             :                                  * Note that effective_io_concurrency may be 0, which disables
     513                 :             :                                  * AIO.
     514                 :             :                                  */
     515                 :        1482 :                                 ring_size_kb += (BLCKSZ / 1024) *
     516                 :        1482 :                                         io_combine_limit * effective_io_concurrency;
     517                 :             : 
     518         [ -  + ]:         741 :                                 if (ring_size_kb > ring_max_kb)
     519                 :         741 :                                         ring_size_kb = ring_max_kb;
     520                 :             :                                 break;
     521                 :         741 :                         }
     522                 :             :                 case BAS_BULKWRITE:
     523                 :        1402 :                         ring_size_kb = 16 * 1024;
     524                 :        1402 :                         break;
     525                 :             :                 case BAS_VACUUM:
     526                 :           0 :                         ring_size_kb = 2048;
     527                 :           0 :                         break;
     528                 :             : 
     529                 :             :                 default:
     530   [ #  #  #  # ]:           0 :                         elog(ERROR, "unrecognized buffer access strategy: %d",
     531                 :             :                                  (int) btype);
     532                 :           0 :                         return NULL;            /* keep compiler quiet */
     533                 :             :         }
     534                 :             : 
     535                 :        2143 :         return GetAccessStrategyWithSize(btype, ring_size_kb);
     536                 :        2143 : }
     537                 :             : 
     538                 :             : /*
     539                 :             :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     540                 :             :  *              number of buffers equivalent to the passed in size.
     541                 :             :  *
     542                 :             :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     543                 :             :  * the function will return NULL.  ring_size_kb must not be negative.
     544                 :             :  */
     545                 :             : BufferAccessStrategy
     546                 :        2664 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     547                 :             : {
     548                 :        2664 :         int                     ring_buffers;
     549                 :        2664 :         BufferAccessStrategy strategy;
     550                 :             : 
     551         [ +  - ]:        2664 :         Assert(ring_size_kb >= 0);
     552                 :             : 
     553                 :             :         /* Figure out how many buffers ring_size_kb is */
     554                 :        2664 :         ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     555                 :             : 
     556                 :             :         /* 0 means unlimited, so no BufferAccessStrategy required */
     557         [ +  + ]:        2664 :         if (ring_buffers == 0)
     558                 :           2 :                 return NULL;
     559                 :             : 
     560                 :             :         /* Cap to 1/8th of shared_buffers */
     561         [ -  + ]:        2662 :         ring_buffers = Min(NBuffers / 8, ring_buffers);
     562                 :             : 
     563                 :             :         /* NBuffers should never be less than 16, so this shouldn't happen */
     564         [ +  - ]:        2662 :         Assert(ring_buffers > 0);
     565                 :             : 
     566                 :             :         /* Allocate the object and initialize all elements to zeroes */
     567                 :        2662 :         strategy = (BufferAccessStrategy)
     568                 :        2662 :                 palloc0(offsetof(BufferAccessStrategyData, buffers) +
     569                 :        2662 :                                 ring_buffers * sizeof(Buffer));
     570                 :             : 
     571                 :             :         /* Set fields that don't start out zero */
     572                 :        2662 :         strategy->btype = btype;
     573                 :        2662 :         strategy->nbuffers = ring_buffers;
     574                 :             : 
     575                 :        2662 :         return strategy;
     576                 :        2664 : }
     577                 :             : 
     578                 :             : /*
     579                 :             :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     580                 :             :  *              the ring
     581                 :             :  *
     582                 :             :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     583                 :             :  * returning NULL with 0 size.
     584                 :             :  */
     585                 :             : int
     586                 :           6 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     587                 :             : {
     588         [ +  - ]:           6 :         if (strategy == NULL)
     589                 :           0 :                 return 0;
     590                 :             : 
     591                 :           6 :         return strategy->nbuffers;
     592                 :           6 : }
     593                 :             : 
     594                 :             : /*
     595                 :             :  * GetAccessStrategyPinLimit -- get cap of number of buffers that should be pinned
     596                 :             :  *
     597                 :             :  * When pinning extra buffers to look ahead, users of a ring-based strategy are
     598                 :             :  * in danger of pinning too much of the ring at once while performing look-ahead.
     599                 :             :  * For some strategies, that means "escaping" from the ring, and in others it
     600                 :             :  * means forcing dirty data to disk very frequently with associated WAL
     601                 :             :  * flushing.  Since external code has no insight into any of that, allow
     602                 :             :  * individual strategy types to expose a clamp that should be applied when
     603                 :             :  * deciding on a maximum number of buffers to pin at once.
     604                 :             :  *
     605                 :             :  * Callers should combine this number with other relevant limits and take the
     606                 :             :  * minimum.
     607                 :             :  */
     608                 :             : int
     609                 :      347601 : GetAccessStrategyPinLimit(BufferAccessStrategy strategy)
     610                 :             : {
     611         [ +  + ]:      347601 :         if (strategy == NULL)
     612                 :      345007 :                 return NBuffers;
     613                 :             : 
     614         [ +  + ]:        2594 :         switch (strategy->btype)
     615                 :             :         {
     616                 :             :                 case BAS_BULKREAD:
     617                 :             : 
     618                 :             :                         /*
     619                 :             :                          * Since BAS_BULKREAD uses StrategyRejectBuffer(), dirty buffers
     620                 :             :                          * shouldn't be a problem and the caller is free to pin up to the
     621                 :             :                          * entire ring at once.
     622                 :             :                          */
     623                 :         738 :                         return strategy->nbuffers;
     624                 :             : 
     625                 :             :                 default:
     626                 :             : 
     627                 :             :                         /*
     628                 :             :                          * Tell caller not to pin more than half the buffers in the ring.
     629                 :             :                          * This is a trade-off between look ahead distance and deferring
     630                 :             :                          * writeback and associated WAL traffic.
     631                 :             :                          */
     632                 :        1856 :                         return strategy->nbuffers / 2;
     633                 :             :         }
     634                 :      347601 : }
     635                 :             : 
     636                 :             : /*
     637                 :             :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     638                 :             :  *
     639                 :             :  * A simple pfree would do at the moment, but we would prefer that callers
     640                 :             :  * don't assume that much about the representation of BufferAccessStrategy.
     641                 :             :  */
     642                 :             : void
     643                 :        2110 : FreeAccessStrategy(BufferAccessStrategy strategy)
     644                 :             : {
     645                 :             :         /* don't crash if called on a "default" strategy */
     646         [ -  + ]:        2110 :         if (strategy != NULL)
     647                 :        2110 :                 pfree(strategy);
     648                 :        2110 : }
     649                 :             : 
     650                 :             : /*
     651                 :             :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     652                 :             :  *              ring is empty / not usable.
     653                 :             :  *
     654                 :             :  * The buffer is pinned and marked as owned, using TrackNewBufferPin(), before
     655                 :             :  * returning.
     656                 :             :  */
     657                 :             : static BufferDesc *
     658                 :        9305 : GetBufferFromRing(BufferAccessStrategy strategy, uint64 *buf_state)
     659                 :             : {
     660                 :        9305 :         BufferDesc *buf;
     661                 :        9305 :         Buffer          bufnum;
     662                 :        9305 :         uint64          old_buf_state;
     663                 :        9305 :         uint64          local_buf_state;        /* to avoid repeated (de-)referencing */
     664                 :             : 
     665                 :             : 
     666                 :             :         /* Advance to next ring slot */
     667         [ +  + ]:        9305 :         if (++strategy->current >= strategy->nbuffers)
     668                 :           3 :                 strategy->current = 0;
     669                 :             : 
     670                 :             :         /*
     671                 :             :          * If the slot hasn't been filled yet, tell the caller to allocate a new
     672                 :             :          * buffer with the normal allocation strategy.  He will then fill this
     673                 :             :          * slot by calling AddBufferToRing with the new buffer.
     674                 :             :          */
     675                 :        9305 :         bufnum = strategy->buffers[strategy->current];
     676         [ +  + ]:        9305 :         if (bufnum == InvalidBuffer)
     677                 :        9288 :                 return NULL;
     678                 :             : 
     679                 :          17 :         buf = GetBufferDescriptor(bufnum - 1);
     680                 :             : 
     681                 :             :         /*
     682                 :             :          * Check whether the buffer can be used and pin it if so. Do this using a
     683                 :             :          * CAS loop, to avoid having to lock the buffer header.
     684                 :             :          */
     685                 :          17 :         old_buf_state = pg_atomic_read_u64(&buf->state);
     686                 :          17 :         for (;;)
     687                 :             :         {
     688                 :          17 :                 local_buf_state = old_buf_state;
     689                 :             : 
     690                 :             :                 /*
     691                 :             :                  * If the buffer is pinned we cannot use it under any circumstances.
     692                 :             :                  *
     693                 :             :                  * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     694                 :             :                  * since our own previous usage of the ring element would have left it
     695                 :             :                  * there, but it might've been decremented by clock-sweep since then).
     696                 :             :                  * A higher usage_count indicates someone else has touched the buffer,
     697                 :             :                  * so we shouldn't re-use it.
     698                 :             :                  */
     699                 :          17 :                 if (BUF_STATE_GET_REFCOUNT(local_buf_state) != 0
     700   [ +  -  -  + ]:          17 :                         || BUF_STATE_GET_USAGECOUNT(local_buf_state) > 1)
     701                 :           0 :                         break;
     702                 :             : 
     703                 :             :                 /* See equivalent code in PinBuffer() */
     704         [ -  + ]:          17 :                 if (unlikely(local_buf_state & BM_LOCKED))
     705                 :             :                 {
     706                 :           0 :                         old_buf_state = WaitBufHdrUnlocked(buf);
     707                 :           0 :                         continue;
     708                 :             :                 }
     709                 :             : 
     710                 :             :                 /* pin the buffer if the CAS succeeds */
     711                 :          17 :                 local_buf_state += BUF_REFCOUNT_ONE;
     712                 :             : 
     713   [ +  -  +  - ]:          34 :                 if (pg_atomic_compare_exchange_u64(&buf->state, &old_buf_state,
     714                 :          17 :                                                                                    local_buf_state))
     715                 :             :                 {
     716                 :          17 :                         *buf_state = local_buf_state;
     717                 :             : 
     718                 :          17 :                         TrackNewBufferPin(BufferDescriptorGetBuffer(buf));
     719                 :          17 :                         return buf;
     720                 :             :                 }
     721                 :             :         }
     722                 :             : 
     723                 :             :         /*
     724                 :             :          * Tell caller to allocate a new buffer with the normal allocation
     725                 :             :          * strategy.  He'll then replace this ring element via AddBufferToRing.
     726                 :             :          */
     727                 :           0 :         return NULL;
     728                 :        9305 : }
     729                 :             : 
     730                 :             : /*
     731                 :             :  * AddBufferToRing -- add a buffer to the buffer ring
     732                 :             :  *
     733                 :             :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     734                 :             :  * is called with the spinlock held, it had better be quite cheap.
     735                 :             :  */
     736                 :             : static void
     737                 :        9288 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     738                 :             : {
     739                 :        9288 :         strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     740                 :        9288 : }
     741                 :             : 
     742                 :             : /*
     743                 :             :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     744                 :             :  * strategy ring.
     745                 :             :  */
     746                 :             : IOContext
     747                 :    11370285 : IOContextForStrategy(BufferAccessStrategy strategy)
     748                 :             : {
     749         [ +  + ]:    11370285 :         if (!strategy)
     750                 :    11317990 :                 return IOCONTEXT_NORMAL;
     751                 :             : 
     752   [ -  +  +  +  :       52295 :         switch (strategy->btype)
                      - ]
     753                 :             :         {
     754                 :             :                 case BAS_NORMAL:
     755                 :             : 
     756                 :             :                         /*
     757                 :             :                          * Currently, GetAccessStrategy() returns NULL for
     758                 :             :                          * BufferAccessStrategyType BAS_NORMAL, so this case is
     759                 :             :                          * unreachable.
     760                 :             :                          */
     761                 :           0 :                         pg_unreachable();
     762                 :             :                         return IOCONTEXT_NORMAL;
     763                 :             :                 case BAS_BULKREAD:
     764                 :        4018 :                         return IOCONTEXT_BULKREAD;
     765                 :             :                 case BAS_BULKWRITE:
     766                 :       16488 :                         return IOCONTEXT_BULKWRITE;
     767                 :             :                 case BAS_VACUUM:
     768                 :       31789 :                         return IOCONTEXT_VACUUM;
     769                 :             :         }
     770                 :             : 
     771   [ #  #  #  # ]:           0 :         elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     772                 :           0 :         pg_unreachable();
     773                 :    11370285 : }
     774                 :             : 
     775                 :             : /*
     776                 :             :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     777                 :             :  *
     778                 :             :  * When a nondefault strategy is used, the buffer manager calls this function
     779                 :             :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     780                 :             :  * be written out and doing so would require flushing WAL too.  This gives us
     781                 :             :  * a chance to choose a different victim.
     782                 :             :  *
     783                 :             :  * Returns true if buffer manager should ask for a new victim, and false
     784                 :             :  * if this buffer should be written and re-used.
     785                 :             :  */
     786                 :             : bool
     787                 :           0 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     788                 :             : {
     789                 :             :         /* We only do this in bulkread mode */
     790         [ #  # ]:           0 :         if (strategy->btype != BAS_BULKREAD)
     791                 :           0 :                 return false;
     792                 :             : 
     793                 :             :         /* Don't muck with behavior of normal buffer-replacement strategy */
     794   [ #  #  #  # ]:           0 :         if (!from_ring ||
     795                 :           0 :                 strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     796                 :           0 :                 return false;
     797                 :             : 
     798                 :             :         /*
     799                 :             :          * Remove the dirty buffer from the ring; necessary to prevent infinite
     800                 :             :          * loop if all ring members are dirty.
     801                 :             :          */
     802                 :           0 :         strategy->buffers[strategy->current] = InvalidBuffer;
     803                 :             : 
     804                 :           0 :         return true;
     805                 :           0 : }
        

Generated by: LCOV version 2.3.2-1