LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - s_lock.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 83.7 % 43 36
Test Date: 2026-01-26 10:56:24 Functions: 83.3 % 6 5
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 60.7 % 28 17

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * s_lock.c
       4                 :             :  *         Implementation of spinlocks.
       5                 :             :  *
       6                 :             :  * When waiting for a contended spinlock we loop tightly for awhile, then
       7                 :             :  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
       8                 :             :  * small multiple of the maximum time we expect a spinlock to be held.  100
       9                 :             :  * iterations seems about right as an initial guess.  However, on a
      10                 :             :  * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario
      11                 :             :  * it's usually better to spin a bit longer than to call the kernel, so we try
      12                 :             :  * to adapt the spin loop count depending on whether we seem to be in a
      13                 :             :  * uniprocessor or multiprocessor.
      14                 :             :  *
      15                 :             :  * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd
      16                 :             :  * be wrong; there are platforms where that can result in a "stuck
      17                 :             :  * spinlock" failure.  This has been seen particularly on Alphas; it seems
      18                 :             :  * that the first TAS after returning from kernel space will always fail
      19                 :             :  * on that hardware.
      20                 :             :  *
      21                 :             :  * Once we do decide to block, we use randomly increasing pg_usleep()
      22                 :             :  * delays. The first delay is 1 msec, then the delay randomly increases to
      23                 :             :  * about one second, after which we reset to 1 msec and start again.  The
      24                 :             :  * idea here is that in the presence of heavy contention we need to
      25                 :             :  * increase the delay, else the spinlock holder may never get to run and
      26                 :             :  * release the lock.  (Consider situation where spinlock holder has been
      27                 :             :  * nice'd down in priority by the scheduler --- it will not get scheduled
      28                 :             :  * until all would-be acquirers are sleeping, so if we always use a 1-msec
      29                 :             :  * sleep, there is a real possibility of starvation.)  But we can't just
      30                 :             :  * clamp the delay to an upper bound, else it would take a long time to
      31                 :             :  * make a reasonable number of tries.
      32                 :             :  *
      33                 :             :  * We time out and declare error after NUM_DELAYS delays (thus, exactly
      34                 :             :  * that many tries).  With the given settings, this will usually take 2 or
      35                 :             :  * so minutes.  It seems better to fix the total number of tries (and thus
      36                 :             :  * the probability of unintended failure) than to fix the total time
      37                 :             :  * spent.
      38                 :             :  *
      39                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
      41                 :             :  *
      42                 :             :  *
      43                 :             :  * IDENTIFICATION
      44                 :             :  *        src/backend/storage/lmgr/s_lock.c
      45                 :             :  *
      46                 :             :  *-------------------------------------------------------------------------
      47                 :             :  */
      48                 :             : #include "postgres.h"
      49                 :             : 
      50                 :             : #include <time.h>
      51                 :             : #include <unistd.h>
      52                 :             : 
      53                 :             : #include "common/pg_prng.h"
      54                 :             : #include "storage/s_lock.h"
      55                 :             : #include "utils/wait_event.h"
      56                 :             : 
      57                 :             : #define MIN_SPINS_PER_DELAY 10
      58                 :             : #define MAX_SPINS_PER_DELAY 1000
      59                 :             : #define NUM_DELAYS                      1000
      60                 :             : #define MIN_DELAY_USEC          1000L
      61                 :             : #define MAX_DELAY_USEC          1000000L
      62                 :             : 
      63                 :             : #ifdef S_LOCK_TEST
      64                 :             : /*
      65                 :             :  * These are needed by pgstat_report_wait_start in the standalone compile of
      66                 :             :  * s_lock_test.
      67                 :             :  */
      68                 :             : static uint32 local_my_wait_event_info;
      69                 :             : uint32     *my_wait_event_info = &local_my_wait_event_info;
      70                 :             : #endif
      71                 :             : 
      72                 :             : static int      spins_per_delay = DEFAULT_SPINS_PER_DELAY;
      73                 :             : 
      74                 :             : 
      75                 :             : /*
      76                 :             :  * s_lock_stuck() - complain about a stuck spinlock
      77                 :             :  */
      78                 :             : static void
      79                 :           0 : s_lock_stuck(const char *file, int line, const char *func)
      80                 :             : {
      81         [ #  # ]:           0 :         if (!func)
      82                 :           0 :                 func = "(unknown)";
      83                 :             : #if defined(S_LOCK_TEST)
      84                 :             :         fprintf(stderr,
      85                 :             :                         "\nStuck spinlock detected at %s, %s:%d.\n",
      86                 :             :                         func, file, line);
      87                 :             :         exit(1);
      88                 :             : #else
      89   [ #  #  #  # ]:           0 :         elog(PANIC, "stuck spinlock detected at %s, %s:%d",
      90                 :             :                  func, file, line);
      91                 :             : #endif
      92                 :           0 : }
      93                 :             : 
      94                 :             : /*
      95                 :             :  * s_lock(lock) - platform-independent portion of waiting for a spinlock.
      96                 :             :  */
      97                 :             : int
      98                 :         133 : s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
      99                 :             : {
     100                 :         133 :         SpinDelayStatus delayStatus;
     101                 :             : 
     102                 :         133 :         init_spin_delay(&delayStatus, file, line, func);
     103                 :             : 
     104   [ +  +  +  + ]:       21122 :         while (TAS_SPIN(lock))
     105                 :             :         {
     106                 :       20989 :                 perform_spin_delay(&delayStatus);
     107                 :             :         }
     108                 :             : 
     109                 :         133 :         finish_spin_delay(&delayStatus);
     110                 :             : 
     111                 :         266 :         return delayStatus.delays;
     112                 :         133 : }
     113                 :             : 
     114                 :             : #ifdef USE_DEFAULT_S_UNLOCK
     115                 :             : void
     116                 :             : s_unlock(volatile slock_t *lock)
     117                 :             : {
     118                 :             :         *lock = 0;
     119                 :             : }
     120                 :             : #endif
     121                 :             : 
     122                 :             : /*
     123                 :             :  * Wait while spinning on a contended spinlock.
     124                 :             :  */
     125                 :             : void
     126                 :       24410 : perform_spin_delay(SpinDelayStatus *status)
     127                 :             : {
     128                 :             :         /* CPU-specific delay each time through the loop */
     129                 :       24410 :         SPIN_DELAY();
     130                 :             : 
     131                 :             :         /* Block the process every spins_per_delay tries */
     132         [ +  + ]:       24410 :         if (++(status->spins) >= spins_per_delay)
     133                 :             :         {
     134         [ +  - ]:          20 :                 if (++(status->delays) > NUM_DELAYS)
     135                 :           0 :                         s_lock_stuck(status->file, status->line, status->func);
     136                 :             : 
     137         [ -  + ]:          20 :                 if (status->cur_delay == 0) /* first time to delay? */
     138                 :          20 :                         status->cur_delay = MIN_DELAY_USEC;
     139                 :             : 
     140                 :             :                 /*
     141                 :             :                  * Once we start sleeping, the overhead of reporting a wait event is
     142                 :             :                  * justified. Actively spinning easily stands out in profilers, but
     143                 :             :                  * sleeping with an exponential backoff is harder to spot...
     144                 :             :                  *
     145                 :             :                  * We might want to report something more granular at some point, but
     146                 :             :                  * this is better than nothing.
     147                 :             :                  */
     148                 :          20 :                 pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
     149                 :          20 :                 pg_usleep(status->cur_delay);
     150                 :          20 :                 pgstat_report_wait_end();
     151                 :             : 
     152                 :             : #if defined(S_LOCK_TEST)
     153                 :             :                 fprintf(stdout, "*");
     154                 :             :                 fflush(stdout);
     155                 :             : #endif
     156                 :             : 
     157                 :             :                 /* increase delay by a random fraction between 1X and 2X */
     158                 :          40 :                 status->cur_delay += (int) (status->cur_delay *
     159                 :          20 :                                                                         pg_prng_double(&pg_global_prng_state) + 0.5);
     160                 :             :                 /* wrap back to minimum delay when max is exceeded */
     161         [ +  - ]:          20 :                 if (status->cur_delay > MAX_DELAY_USEC)
     162                 :           0 :                         status->cur_delay = MIN_DELAY_USEC;
     163                 :             : 
     164                 :          20 :                 status->spins = 0;
     165                 :          20 :         }
     166                 :       24410 : }
     167                 :             : 
     168                 :             : /*
     169                 :             :  * After acquiring a spinlock, update estimates about how long to loop.
     170                 :             :  *
     171                 :             :  * If we were able to acquire the lock without delaying, it's a good
     172                 :             :  * indication we are in a multiprocessor.  If we had to delay, it's a sign
     173                 :             :  * (but not a sure thing) that we are in a uniprocessor. Hence, we
     174                 :             :  * decrement spins_per_delay slowly when we had to delay, and increase it
     175                 :             :  * rapidly when we didn't.  It's expected that spins_per_delay will
     176                 :             :  * converge to the minimum value on a uniprocessor and to the maximum
     177                 :             :  * value on a multiprocessor.
     178                 :             :  *
     179                 :             :  * Note: spins_per_delay is local within our current process. We want to
     180                 :             :  * average these observations across multiple backends, since it's
     181                 :             :  * relatively rare for this function to even get entered, and so a single
     182                 :             :  * backend might not live long enough to converge on a good value.  That
     183                 :             :  * is handled by the two routines below.
     184                 :             :  */
     185                 :             : void
     186                 :        1017 : finish_spin_delay(SpinDelayStatus *status)
     187                 :             : {
     188         [ +  + ]:        1017 :         if (status->cur_delay == 0)
     189                 :             :         {
     190                 :             :                 /* we never had to delay */
     191         [ +  + ]:         997 :                 if (spins_per_delay < MAX_SPINS_PER_DELAY)
     192         [ +  + ]:         366 :                         spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY);
     193                 :         997 :         }
     194                 :             :         else
     195                 :             :         {
     196         [ -  + ]:          20 :                 if (spins_per_delay > MIN_SPINS_PER_DELAY)
     197         [ +  - ]:          20 :                         spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY);
     198                 :             :         }
     199                 :        1017 : }
     200                 :             : 
     201                 :             : /*
     202                 :             :  * Set local copy of spins_per_delay during backend startup.
     203                 :             :  *
     204                 :             :  * NB: this has to be pretty fast as it is called while holding a spinlock
     205                 :             :  */
     206                 :             : void
     207                 :         806 : set_spins_per_delay(int shared_spins_per_delay)
     208                 :             : {
     209                 :         806 :         spins_per_delay = shared_spins_per_delay;
     210                 :         806 : }
     211                 :             : 
     212                 :             : /*
     213                 :             :  * Update shared estimate of spins_per_delay during backend exit.
     214                 :             :  *
     215                 :             :  * NB: this has to be pretty fast as it is called while holding a spinlock
     216                 :             :  */
     217                 :             : int
     218                 :         806 : update_spins_per_delay(int shared_spins_per_delay)
     219                 :             : {
     220                 :             :         /*
     221                 :             :          * We use an exponential moving average with a relatively slow adaption
     222                 :             :          * rate, so that noise in any one backend's result won't affect the shared
     223                 :             :          * value too much.  As long as both inputs are within the allowed range,
     224                 :             :          * the result must be too, so we need not worry about clamping the result.
     225                 :             :          *
     226                 :             :          * We deliberately truncate rather than rounding; this is so that single
     227                 :             :          * adjustments inside a backend can affect the shared estimate (see the
     228                 :             :          * asymmetric adjustment rules above).
     229                 :             :          */
     230                 :         806 :         return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
     231                 :             : }
     232                 :             : 
     233                 :             : 
     234                 :             : /*****************************************************************************/
     235                 :             : #if defined(S_LOCK_TEST)
     236                 :             : 
     237                 :             : /*
     238                 :             :  * test program for verifying a port's spinlock support.
     239                 :             :  */
     240                 :             : 
     241                 :             : struct test_lock_struct
     242                 :             : {
     243                 :             :         char            pad1;
     244                 :             :         slock_t         lock;
     245                 :             :         char            pad2;
     246                 :             : };
     247                 :             : 
     248                 :             : volatile struct test_lock_struct test_lock;
     249                 :             : 
     250                 :             : int
     251                 :             : main()
     252                 :             : {
     253                 :             :         pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
     254                 :             : 
     255                 :             :         test_lock.pad1 = test_lock.pad2 = 0x44;
     256                 :             : 
     257                 :             :         S_INIT_LOCK(&test_lock.lock);
     258                 :             : 
     259                 :             :         if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     260                 :             :         {
     261                 :             :                 printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     262                 :             :                 return 1;
     263                 :             :         }
     264                 :             : 
     265                 :             :         if (!S_LOCK_FREE(&test_lock.lock))
     266                 :             :         {
     267                 :             :                 printf("S_LOCK_TEST: failed, lock not initialized\n");
     268                 :             :                 return 1;
     269                 :             :         }
     270                 :             : 
     271                 :             :         S_LOCK(&test_lock.lock);
     272                 :             : 
     273                 :             :         if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     274                 :             :         {
     275                 :             :                 printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     276                 :             :                 return 1;
     277                 :             :         }
     278                 :             : 
     279                 :             :         if (S_LOCK_FREE(&test_lock.lock))
     280                 :             :         {
     281                 :             :                 printf("S_LOCK_TEST: failed, lock not locked\n");
     282                 :             :                 return 1;
     283                 :             :         }
     284                 :             : 
     285                 :             :         S_UNLOCK(&test_lock.lock);
     286                 :             : 
     287                 :             :         if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     288                 :             :         {
     289                 :             :                 printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     290                 :             :                 return 1;
     291                 :             :         }
     292                 :             : 
     293                 :             :         if (!S_LOCK_FREE(&test_lock.lock))
     294                 :             :         {
     295                 :             :                 printf("S_LOCK_TEST: failed, lock not unlocked\n");
     296                 :             :                 return 1;
     297                 :             :         }
     298                 :             : 
     299                 :             :         S_LOCK(&test_lock.lock);
     300                 :             : 
     301                 :             :         if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     302                 :             :         {
     303                 :             :                 printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     304                 :             :                 return 1;
     305                 :             :         }
     306                 :             : 
     307                 :             :         if (S_LOCK_FREE(&test_lock.lock))
     308                 :             :         {
     309                 :             :                 printf("S_LOCK_TEST: failed, lock not re-locked\n");
     310                 :             :                 return 1;
     311                 :             :         }
     312                 :             : 
     313                 :             :         printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
     314                 :             :         printf("             exit with a 'stuck spinlock' message\n");
     315                 :             :         printf("             if S_LOCK() and TAS() are working.\n");
     316                 :             :         fflush(stdout);
     317                 :             : 
     318                 :             :         s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
     319                 :             : 
     320                 :             :         printf("S_LOCK_TEST: failed, lock not locked\n");
     321                 :             :         return 1;
     322                 :             : }
     323                 :             : 
     324                 :             : #endif                                                  /* S_LOCK_TEST */
        

Generated by: LCOV version 2.3.2-1