Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * lwlock.c
4 : : * Lightweight lock manager
5 : : *
6 : : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : : * access to shared-memory data structures. Therefore, they offer both
8 : : * exclusive and shared lock modes (to support read/write and read-only
9 : : * access to a shared object). There are few other frammishes. User-level
10 : : * locking should be done with the full lock manager --- which depends on
11 : : * LWLocks to protect its shared state.
12 : : *
13 : : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : : * wait until a variable changes value. The variable is initially not set
15 : : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : : * value it was set to when the lock was released last, and can be updated
17 : : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : : * waits for the variable to be updated, or until the lock is free. When
19 : : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : : * appropriate value for a free lock. The meaning of the variable is up to
21 : : * the caller, the lightweight lock code just assigns and compares it.
22 : : *
23 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
24 : : * Portions Copyright (c) 1994, Regents of the University of California
25 : : *
26 : : * IDENTIFICATION
27 : : * src/backend/storage/lmgr/lwlock.c
28 : : *
29 : : * NOTES:
30 : : *
31 : : * This used to be a pretty straight forward reader-writer lock
32 : : * implementation, in which the internal state was protected by a
33 : : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : : * too high for workloads/locks that were taken in shared mode very
35 : : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : : * while trying to acquire a shared lock that was actually free.
37 : : *
38 : : * Thus a new implementation was devised that provides wait-free shared lock
39 : : * acquisition for locks that aren't exclusively locked.
40 : : *
41 : : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : : * the formerly separate shared and exclusive counters and to use atomic
43 : : * operations to acquire the lock. That's fairly easy to do for plain
44 : : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : : * in the OS.
46 : : *
47 : : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : : * variable. For exclusive lock we swap in a sentinel value
49 : : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : : *
51 : : * To release the lock we use an atomic decrement to release the lock. If the
52 : : * new value is zero (we get that atomically), we know we can/have to release
53 : : * waiters.
54 : : *
55 : : * Obviously it is important that the sentinel value for exclusive locks
56 : : * doesn't conflict with the maximum number of possible share lockers -
57 : : * luckily MAX_BACKENDS makes that easily possible.
58 : : *
59 : : *
60 : : * The attentive reader might have noticed that naively doing the above has a
61 : : * glaring race condition: We try to lock using the atomic operations and
62 : : * notice that we have to wait. Unfortunately by the time we have finished
63 : : * queuing, the former locker very well might have already finished its
64 : : * work. That's problematic because we're now stuck waiting inside the OS.
65 : :
66 : : * To mitigate those races we use a two phased attempt at locking:
67 : : * Phase 1: Try to do it atomically, if we succeed, nice
68 : : * Phase 2: Add ourselves to the waitqueue of the lock
69 : : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : : * the queue
71 : : * Phase 4: Sleep till wake-up, goto Phase 1
72 : : *
73 : : * This protects us against the problem from above as nobody can release too
74 : : * quick, before we're queued, since after Phase 2 we're already queued.
75 : : * -------------------------------------------------------------------------
76 : : */
77 : : #include "postgres.h"
78 : :
79 : : #include "miscadmin.h"
80 : : #include "pg_trace.h"
81 : : #include "pgstat.h"
82 : : #include "port/pg_bitutils.h"
83 : : #include "storage/proc.h"
84 : : #include "storage/proclist.h"
85 : : #include "storage/procnumber.h"
86 : : #include "storage/spin.h"
87 : : #include "utils/memutils.h"
88 : :
89 : : #ifdef LWLOCK_STATS
90 : : #include "utils/hsearch.h"
91 : : #endif
92 : :
93 : :
94 : : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
95 : : #define LW_FLAG_WAKE_IN_PROGRESS ((uint32) 1 << 30)
96 : : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
97 : : #define LW_FLAG_BITS 3
98 : : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
99 : :
100 : : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101 : : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
102 : : #define LW_VAL_SHARED 1
103 : :
104 : : /* already (power of 2)-1, i.e. suitable for a mask */
105 : : #define LW_SHARED_MASK MAX_BACKENDS
106 : : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
107 : :
108 : :
109 : : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110 : : "MAX_BACKENDS + 1 needs to be a power of 2");
111 : :
112 : : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113 : : "MAX_BACKENDS and LW_FLAG_MASK overlap");
114 : :
115 : : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116 : : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
117 : :
118 : : /*
119 : : * There are three sorts of LWLock "tranches":
120 : : *
121 : : * 1. The individually-named locks defined in lwlocklist.h each have their
122 : : * own tranche. We absorb the names of these tranches from there into
123 : : * BuiltinTrancheNames here.
124 : : *
125 : : * 2. There are some predefined tranches for built-in groups of locks defined
126 : : * in lwlocklist.h. We absorb the names of these tranches, too.
127 : : *
128 : : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
129 : : * or LWLockNewTrancheId. These names are stored in shared memory and can be
130 : : * accessed via LWLockTrancheNames.
131 : : *
132 : : * All these names are user-visible as wait event names, so choose with care
133 : : * ... and do not forget to update the documentation's list of wait events.
134 : : */
135 : : static const char *const BuiltinTrancheNames[] = {
136 : : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
137 : : #define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
138 : : #include "storage/lwlocklist.h"
139 : : #undef PG_LWLOCK
140 : : #undef PG_LWLOCKTRANCHE
141 : : };
142 : :
143 : : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
144 : : LWTRANCHE_FIRST_USER_DEFINED,
145 : : "missing entries in BuiltinTrancheNames[]");
146 : :
147 : : /*
148 : : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
149 : : * points to the shared memory locations of the names of all
150 : : * dynamically-created tranches. Backends inherit the pointer by fork from the
151 : : * postmaster (except in the EXEC_BACKEND case, where we have special measures
152 : : * to pass it down).
153 : : */
154 : : char **LWLockTrancheNames = NULL;
155 : :
156 : : /*
157 : : * This points to the main array of LWLocks in shared memory. Backends inherit
158 : : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
159 : : * where we have special measures to pass it down).
160 : : */
161 : : LWLockPadded *MainLWLockArray = NULL;
162 : :
163 : : /*
164 : : * We use this structure to keep track of locked LWLocks for release
165 : : * during error recovery. Normally, only a few will be held at once, but
166 : : * occasionally the number can be much higher.
167 : : */
168 : : #define MAX_SIMUL_LWLOCKS 200
169 : :
170 : : /* struct representing the LWLocks we're holding */
171 : : typedef struct LWLockHandle
172 : : {
173 : : LWLock *lock;
174 : : LWLockMode mode;
175 : : } LWLockHandle;
176 : :
177 : : static int num_held_lwlocks = 0;
178 : : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
179 : :
180 : : /* struct representing the LWLock tranche request for named tranche */
181 : : typedef struct NamedLWLockTrancheRequest
182 : : {
183 : : char tranche_name[NAMEDATALEN];
184 : : int num_lwlocks;
185 : : } NamedLWLockTrancheRequest;
186 : :
187 : : /*
188 : : * NamedLWLockTrancheRequests is the valid length of the request array. These
189 : : * variables are non-static so that launch_backend.c can copy them to child
190 : : * processes in EXEC_BACKEND builds.
191 : : */
192 : : int NamedLWLockTrancheRequests = 0;
193 : : NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
194 : :
195 : : /* postmaster's local copy of the request array */
196 : : static NamedLWLockTrancheRequest *LocalNamedLWLockTrancheRequestArray = NULL;
197 : :
198 : : /* shared memory counter of registered tranches */
199 : : int *LWLockCounter = NULL;
200 : :
201 : : /* backend-local counter of registered tranches */
202 : : static int LocalLWLockCounter;
203 : :
204 : : #define MAX_NAMED_TRANCHES 256
205 : :
206 : : static void InitializeLWLocks(void);
207 : : static inline void LWLockReportWaitStart(LWLock *lock);
208 : : static inline void LWLockReportWaitEnd(void);
209 : : static const char *GetLWTrancheName(uint16 trancheId);
210 : :
211 : : #define T_NAME(lock) \
212 : : GetLWTrancheName((lock)->tranche)
213 : :
214 : : #ifdef LWLOCK_STATS
215 : : typedef struct lwlock_stats_key
216 : : {
217 : : int tranche;
218 : : void *instance;
219 : : } lwlock_stats_key;
220 : :
221 : : typedef struct lwlock_stats
222 : : {
223 : : lwlock_stats_key key;
224 : : int sh_acquire_count;
225 : : int ex_acquire_count;
226 : : int block_count;
227 : : int dequeue_self_count;
228 : : int spin_delay_count;
229 : : } lwlock_stats;
230 : :
231 : : static HTAB *lwlock_stats_htab;
232 : : static lwlock_stats lwlock_stats_dummy;
233 : : #endif
234 : :
235 : : #ifdef LOCK_DEBUG
236 : : bool Trace_lwlocks = false;
237 : :
238 : : inline static void
239 : : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
240 : : {
241 : : /* hide statement & context here, otherwise the log is just too verbose */
242 : : if (Trace_lwlocks)
243 : : {
244 : : uint32 state = pg_atomic_read_u32(&lock->state);
245 : :
246 : : ereport(LOG,
247 : : (errhidestmt(true),
248 : : errhidecontext(true),
249 : : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u waking %d",
250 : : MyProcPid,
251 : : where, T_NAME(lock), lock,
252 : : (state & LW_VAL_EXCLUSIVE) != 0,
253 : : state & LW_SHARED_MASK,
254 : : (state & LW_FLAG_HAS_WAITERS) != 0,
255 : : pg_atomic_read_u32(&lock->nwaiters),
256 : : (state & LW_FLAG_WAKE_IN_PROGRESS) != 0)));
257 : : }
258 : : }
259 : :
260 : : inline static void
261 : : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
262 : : {
263 : : /* hide statement & context here, otherwise the log is just too verbose */
264 : : if (Trace_lwlocks)
265 : : {
266 : : ereport(LOG,
267 : : (errhidestmt(true),
268 : : errhidecontext(true),
269 : : errmsg_internal("%s(%s %p): %s", where,
270 : : T_NAME(lock), lock, msg)));
271 : : }
272 : : }
273 : :
274 : : #else /* not LOCK_DEBUG */
275 : : #define PRINT_LWDEBUG(a,b,c) ((void)0)
276 : : #define LOG_LWDEBUG(a,b,c) ((void)0)
277 : : #endif /* LOCK_DEBUG */
278 : :
279 : : #ifdef LWLOCK_STATS
280 : :
281 : : static void init_lwlock_stats(void);
282 : : static void print_lwlock_stats(int code, Datum arg);
283 : : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
284 : :
285 : : static void
286 : : init_lwlock_stats(void)
287 : : {
288 : : HASHCTL ctl;
289 : : static MemoryContext lwlock_stats_cxt = NULL;
290 : : static bool exit_registered = false;
291 : :
292 : : if (lwlock_stats_cxt != NULL)
293 : : MemoryContextDelete(lwlock_stats_cxt);
294 : :
295 : : /*
296 : : * The LWLock stats will be updated within a critical section, which
297 : : * requires allocating new hash entries. Allocations within a critical
298 : : * section are normally not allowed because running out of memory would
299 : : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
300 : : * turned on in production, so that's an acceptable risk. The hash entries
301 : : * are small, so the risk of running out of memory is minimal in practice.
302 : : */
303 : : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
304 : : "LWLock stats",
305 : : ALLOCSET_DEFAULT_SIZES);
306 : : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
307 : :
308 : : ctl.keysize = sizeof(lwlock_stats_key);
309 : : ctl.entrysize = sizeof(lwlock_stats);
310 : : ctl.hcxt = lwlock_stats_cxt;
311 : : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
312 : : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
313 : : if (!exit_registered)
314 : : {
315 : : on_shmem_exit(print_lwlock_stats, 0);
316 : : exit_registered = true;
317 : : }
318 : : }
319 : :
320 : : static void
321 : : print_lwlock_stats(int code, Datum arg)
322 : : {
323 : : HASH_SEQ_STATUS scan;
324 : : lwlock_stats *lwstats;
325 : :
326 : : hash_seq_init(&scan, lwlock_stats_htab);
327 : :
328 : : /* Grab an LWLock to keep different backends from mixing reports */
329 : : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
330 : :
331 : : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
332 : : {
333 : : fprintf(stderr,
334 : : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
335 : : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
336 : : lwstats->key.instance, lwstats->sh_acquire_count,
337 : : lwstats->ex_acquire_count, lwstats->block_count,
338 : : lwstats->spin_delay_count, lwstats->dequeue_self_count);
339 : : }
340 : :
341 : : LWLockRelease(&MainLWLockArray[0].lock);
342 : : }
343 : :
344 : : static lwlock_stats *
345 : : get_lwlock_stats_entry(LWLock *lock)
346 : : {
347 : : lwlock_stats_key key;
348 : : lwlock_stats *lwstats;
349 : : bool found;
350 : :
351 : : /*
352 : : * During shared memory initialization, the hash table doesn't exist yet.
353 : : * Stats of that phase aren't very interesting, so just collect operations
354 : : * on all locks in a single dummy entry.
355 : : */
356 : : if (lwlock_stats_htab == NULL)
357 : : return &lwlock_stats_dummy;
358 : :
359 : : /* Fetch or create the entry. */
360 : : MemSet(&key, 0, sizeof(key));
361 : : key.tranche = lock->tranche;
362 : : key.instance = lock;
363 : : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
364 : : if (!found)
365 : : {
366 : : lwstats->sh_acquire_count = 0;
367 : : lwstats->ex_acquire_count = 0;
368 : : lwstats->block_count = 0;
369 : : lwstats->dequeue_self_count = 0;
370 : : lwstats->spin_delay_count = 0;
371 : : }
372 : : return lwstats;
373 : : }
374 : : #endif /* LWLOCK_STATS */
375 : :
376 : :
377 : : /*
378 : : * Compute number of LWLocks required by named tranches. These will be
379 : : * allocated in the main array.
380 : : */
381 : : static int
382 : 15 : NumLWLocksForNamedTranches(void)
383 : : {
384 : 15 : int numLocks = 0;
385 : 15 : int i;
386 : :
387 [ - + ]: 15 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
388 : 0 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
389 : :
390 : 30 : return numLocks;
391 : 15 : }
392 : :
393 : : /*
394 : : * Compute shmem space needed for LWLocks and named tranches.
395 : : */
396 : : Size
397 : 15 : LWLockShmemSize(void)
398 : : {
399 : 15 : Size size;
400 : 15 : int numLocks = NUM_FIXED_LWLOCKS;
401 : :
402 : : /*
403 : : * If re-initializing shared memory, the request array will no longer be
404 : : * accessible, so switch to the copy in postmaster's local memory. We'll
405 : : * copy it back into shared memory later when CreateLWLocks() is called
406 : : * again.
407 : : */
408 [ + - ]: 15 : if (LocalNamedLWLockTrancheRequestArray)
409 : 0 : NamedLWLockTrancheRequestArray = LocalNamedLWLockTrancheRequestArray;
410 : :
411 : : /* Calculate total number of locks needed in the main array. */
412 : 15 : numLocks += NumLWLocksForNamedTranches();
413 : :
414 : : /* Space for dynamic allocation counter. */
415 : 15 : size = MAXALIGN(sizeof(int));
416 : :
417 : : /* Space for named tranches. */
418 : 15 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
419 : 15 : size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
420 : :
421 : : /*
422 : : * Make space for named tranche requests. This is done for the benefit of
423 : : * EXEC_BACKEND builds, which otherwise wouldn't be able to call
424 : : * GetNamedLWLockTranche() outside postmaster.
425 : : */
426 : 15 : size = add_size(size, mul_size(NamedLWLockTrancheRequests,
427 : : sizeof(NamedLWLockTrancheRequest)));
428 : :
429 : : /* Space for the LWLock array, plus room for cache line alignment. */
430 : 15 : size = add_size(size, LWLOCK_PADDED_SIZE);
431 : 15 : size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
432 : :
433 : 30 : return size;
434 : 15 : }
435 : :
436 : : /*
437 : : * Allocate shmem space for the main LWLock array and all tranches and
438 : : * initialize it.
439 : : */
440 : : void
441 : 6 : CreateLWLocks(void)
442 : : {
443 [ - + ]: 6 : if (!IsUnderPostmaster)
444 : : {
445 : 6 : Size spaceLocks = LWLockShmemSize();
446 : 6 : char *ptr;
447 : :
448 : : /* Allocate space */
449 : 6 : ptr = (char *) ShmemAlloc(spaceLocks);
450 : :
451 : : /* Initialize the dynamic-allocation counter for tranches */
452 : 6 : LWLockCounter = (int *) ptr;
453 : 6 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
454 : 6 : ptr += MAXALIGN(sizeof(int));
455 : :
456 : : /* Initialize tranche names */
457 : 6 : LWLockTrancheNames = (char **) ptr;
458 : 6 : ptr += MAX_NAMED_TRANCHES * sizeof(char *);
459 [ + + ]: 1542 : for (int i = 0; i < MAX_NAMED_TRANCHES; i++)
460 : : {
461 : 1536 : LWLockTrancheNames[i] = ptr;
462 : 1536 : ptr += NAMEDATALEN;
463 : 1536 : }
464 : :
465 : : /*
466 : : * Move named tranche requests to shared memory. This is done for the
467 : : * benefit of EXEC_BACKEND builds, which otherwise wouldn't be able to
468 : : * call GetNamedLWLockTranche() outside postmaster.
469 : : */
470 [ + - ]: 6 : if (NamedLWLockTrancheRequests > 0)
471 : : {
472 : : /*
473 : : * Save the pointer to the request array in postmaster's local
474 : : * memory. We'll need it if we ever need to re-initialize shared
475 : : * memory after a crash.
476 : : */
477 : 0 : LocalNamedLWLockTrancheRequestArray = NamedLWLockTrancheRequestArray;
478 : :
479 : 0 : memcpy(ptr, NamedLWLockTrancheRequestArray,
480 : : NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest));
481 : 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *) ptr;
482 : 0 : ptr += NamedLWLockTrancheRequests * sizeof(NamedLWLockTrancheRequest);
483 : 0 : }
484 : :
485 : : /* Ensure desired alignment of LWLock array */
486 : 6 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
487 : 6 : MainLWLockArray = (LWLockPadded *) ptr;
488 : :
489 : : /* Initialize all LWLocks */
490 : 6 : InitializeLWLocks();
491 : 6 : }
492 : 6 : }
493 : :
494 : : /*
495 : : * Initialize LWLocks that are fixed and those belonging to named tranches.
496 : : */
497 : : static void
498 : 6 : InitializeLWLocks(void)
499 : : {
500 : 6 : int id;
501 : 6 : int i;
502 : 6 : int j;
503 : 6 : LWLockPadded *lock;
504 : :
505 : : /* Initialize all individual LWLocks in main array */
506 [ + + ]: 342 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
507 : 336 : LWLockInitialize(&lock->lock, id);
508 : :
509 : : /* Initialize buffer mapping LWLocks in main array */
510 : 6 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
511 [ + + ]: 774 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
512 : 768 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
513 : :
514 : : /* Initialize lmgrs' LWLocks in main array */
515 : 6 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
516 [ + + ]: 102 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
517 : 96 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
518 : :
519 : : /* Initialize predicate lmgrs' LWLocks in main array */
520 : 6 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
521 [ + + ]: 102 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
522 : 96 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
523 : :
524 : : /*
525 : : * Copy the info about any named tranches into shared memory (so that
526 : : * other processes can see it), and initialize the requested LWLocks.
527 : : */
528 [ + - ]: 6 : if (NamedLWLockTrancheRequests > 0)
529 : : {
530 : 0 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
531 : :
532 [ # # ]: 0 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
533 : : {
534 : 0 : NamedLWLockTrancheRequest *request;
535 : 0 : int tranche;
536 : :
537 : 0 : request = &NamedLWLockTrancheRequestArray[i];
538 : 0 : tranche = LWLockNewTrancheId(request->tranche_name);
539 : :
540 [ # # ]: 0 : for (j = 0; j < request->num_lwlocks; j++, lock++)
541 : 0 : LWLockInitialize(&lock->lock, tranche);
542 : 0 : }
543 : 0 : }
544 : 6 : }
545 : :
546 : : /*
547 : : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
548 : : */
549 : : void
550 : 806 : InitLWLockAccess(void)
551 : : {
552 : : #ifdef LWLOCK_STATS
553 : : init_lwlock_stats();
554 : : #endif
555 : 806 : }
556 : :
557 : : /*
558 : : * GetNamedLWLockTranche - returns the base address of LWLock from the
559 : : * specified tranche.
560 : : *
561 : : * Caller needs to retrieve the requested number of LWLocks starting from
562 : : * the base lock address returned by this API. This can be used for
563 : : * tranches that are requested by using RequestNamedLWLockTranche() API.
564 : : */
565 : : LWLockPadded *
566 : 0 : GetNamedLWLockTranche(const char *tranche_name)
567 : : {
568 : 0 : int lock_pos;
569 : 0 : int i;
570 : :
571 : : /*
572 : : * Obtain the position of base address of LWLock belonging to requested
573 : : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
574 : : * in MainLWLockArray after fixed locks.
575 : : */
576 : 0 : lock_pos = NUM_FIXED_LWLOCKS;
577 [ # # ]: 0 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
578 : : {
579 : 0 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
580 [ # # # # ]: 0 : tranche_name) == 0)
581 : 0 : return &MainLWLockArray[lock_pos];
582 : :
583 : 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
584 : 0 : }
585 : :
586 [ # # # # ]: 0 : elog(ERROR, "requested tranche is not registered");
587 : :
588 : : /* just to keep compiler quiet */
589 : 0 : return NULL;
590 : 0 : }
591 : :
592 : : /*
593 : : * Allocate a new tranche ID with the provided name.
594 : : */
595 : : int
596 : 2 : LWLockNewTrancheId(const char *name)
597 : : {
598 : 2 : int result;
599 : :
600 [ + - ]: 2 : if (!name)
601 [ # # # # ]: 0 : ereport(ERROR,
602 : : (errcode(ERRCODE_INVALID_NAME),
603 : : errmsg("tranche name cannot be NULL")));
604 : :
605 [ + - ]: 2 : if (strlen(name) >= NAMEDATALEN)
606 [ # # # # ]: 0 : ereport(ERROR,
607 : : (errcode(ERRCODE_NAME_TOO_LONG),
608 : : errmsg("tranche name too long"),
609 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
610 : : NAMEDATALEN - 1)));
611 : :
612 : : /*
613 : : * We use the ShmemLock spinlock to protect LWLockCounter and
614 : : * LWLockTrancheNames.
615 : : */
616 [ - + ]: 2 : SpinLockAcquire(ShmemLock);
617 : :
618 [ + - ]: 2 : if (*LWLockCounter - LWTRANCHE_FIRST_USER_DEFINED >= MAX_NAMED_TRANCHES)
619 : : {
620 : 0 : SpinLockRelease(ShmemLock);
621 [ # # # # ]: 0 : ereport(ERROR,
622 : : (errmsg("maximum number of tranches already registered"),
623 : : errdetail("No more than %d tranches may be registered.",
624 : : MAX_NAMED_TRANCHES)));
625 : 0 : }
626 : :
627 : 2 : result = (*LWLockCounter)++;
628 : 2 : LocalLWLockCounter = *LWLockCounter;
629 : 2 : strlcpy(LWLockTrancheNames[result - LWTRANCHE_FIRST_USER_DEFINED], name, NAMEDATALEN);
630 : :
631 : 2 : SpinLockRelease(ShmemLock);
632 : :
633 : 4 : return result;
634 : 2 : }
635 : :
636 : : /*
637 : : * RequestNamedLWLockTranche
638 : : * Request that extra LWLocks be allocated during postmaster
639 : : * startup.
640 : : *
641 : : * This may only be called via the shmem_request_hook of a library that is
642 : : * loaded into the postmaster via shared_preload_libraries. Calls from
643 : : * elsewhere will fail.
644 : : *
645 : : * The tranche name will be user-visible as a wait event name, so try to
646 : : * use a name that fits the style for those.
647 : : */
648 : : void
649 : 0 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
650 : : {
651 : 0 : NamedLWLockTrancheRequest *request;
652 : : static int NamedLWLockTrancheRequestsAllocated;
653 : :
654 [ # # ]: 0 : if (!process_shmem_requests_in_progress)
655 [ # # # # ]: 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
656 : :
657 [ # # ]: 0 : if (!tranche_name)
658 [ # # # # ]: 0 : ereport(ERROR,
659 : : (errcode(ERRCODE_INVALID_NAME),
660 : : errmsg("tranche name cannot be NULL")));
661 : :
662 [ # # ]: 0 : if (strlen(tranche_name) >= NAMEDATALEN)
663 [ # # # # ]: 0 : ereport(ERROR,
664 : : (errcode(ERRCODE_NAME_TOO_LONG),
665 : : errmsg("tranche name too long"),
666 : : errdetail("LWLock tranche names must be no longer than %d bytes.",
667 : : NAMEDATALEN - 1)));
668 : :
669 [ # # ]: 0 : if (NamedLWLockTrancheRequestArray == NULL)
670 : : {
671 : 0 : NamedLWLockTrancheRequestsAllocated = 16;
672 : 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
673 : 0 : MemoryContextAlloc(TopMemoryContext,
674 : 0 : NamedLWLockTrancheRequestsAllocated
675 : 0 : * sizeof(NamedLWLockTrancheRequest));
676 : 0 : }
677 : :
678 [ # # ]: 0 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
679 : : {
680 : 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
681 : :
682 : 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
683 : 0 : repalloc(NamedLWLockTrancheRequestArray,
684 : 0 : i * sizeof(NamedLWLockTrancheRequest));
685 : 0 : NamedLWLockTrancheRequestsAllocated = i;
686 : 0 : }
687 : :
688 : 0 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
689 : 0 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
690 : 0 : request->num_lwlocks = num_lwlocks;
691 : 0 : NamedLWLockTrancheRequests++;
692 : 0 : }
693 : :
694 : : /*
695 : : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
696 : : */
697 : : void
698 : 37190 : LWLockInitialize(LWLock *lock, int tranche_id)
699 : : {
700 : : /* verify the tranche_id is valid */
701 : 37190 : (void) GetLWTrancheName(tranche_id);
702 : :
703 : 37190 : pg_atomic_init_u32(&lock->state, 0);
704 : : #ifdef LOCK_DEBUG
705 : : pg_atomic_init_u32(&lock->nwaiters, 0);
706 : : #endif
707 : 37190 : lock->tranche = tranche_id;
708 : 37190 : proclist_init(&lock->waiters);
709 : 37190 : }
710 : :
711 : : /*
712 : : * Report start of wait event for light-weight locks.
713 : : *
714 : : * This function will be used by all the light-weight lock calls which
715 : : * needs to wait to acquire the lock. This function distinguishes wait
716 : : * event based on tranche and lock id.
717 : : */
718 : : static inline void
719 : 4221 : LWLockReportWaitStart(LWLock *lock)
720 : : {
721 : 4221 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
722 : 4221 : }
723 : :
724 : : /*
725 : : * Report end of wait event for light-weight locks.
726 : : */
727 : : static inline void
728 : 4221 : LWLockReportWaitEnd(void)
729 : : {
730 : 4221 : pgstat_report_wait_end();
731 : 4221 : }
732 : :
733 : : /*
734 : : * Return the name of an LWLock tranche.
735 : : */
736 : : static const char *
737 : 37191 : GetLWTrancheName(uint16 trancheId)
738 : : {
739 : : /* Built-in tranche or individual LWLock? */
740 [ + + ]: 37191 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
741 : 37151 : return BuiltinTrancheNames[trancheId];
742 : :
743 : : /*
744 : : * We only ever add new entries to LWLockTrancheNames, so most lookups can
745 : : * avoid taking the spinlock as long as the backend-local counter
746 : : * (LocalLWLockCounter) is greater than the requested tranche ID. Else,
747 : : * we need to first update the backend-local counter with ShmemLock held
748 : : * before attempting the lookup again. In practice, the latter case is
749 : : * probably rare.
750 : : */
751 [ + - ]: 40 : if (trancheId >= LocalLWLockCounter)
752 : : {
753 [ # # ]: 0 : SpinLockAcquire(ShmemLock);
754 : 0 : LocalLWLockCounter = *LWLockCounter;
755 : 0 : SpinLockRelease(ShmemLock);
756 : :
757 [ # # ]: 0 : if (trancheId >= LocalLWLockCounter)
758 [ # # # # ]: 0 : elog(ERROR, "tranche %d is not registered", trancheId);
759 : 0 : }
760 : :
761 : : /*
762 : : * It's an extension tranche, so look in LWLockTrancheNames.
763 : : */
764 : 40 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
765 : :
766 : 40 : return LWLockTrancheNames[trancheId];
767 : 37191 : }
768 : :
769 : : /*
770 : : * Return an identifier for an LWLock based on the wait class and event.
771 : : */
772 : : const char *
773 : 1 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
774 : : {
775 [ + - ]: 1 : Assert(classId == PG_WAIT_LWLOCK);
776 : : /* The event IDs are just tranche numbers. */
777 : 1 : return GetLWTrancheName(eventId);
778 : : }
779 : :
780 : : /*
781 : : * Internal function that tries to atomically acquire the lwlock in the passed
782 : : * in mode.
783 : : *
784 : : * This function will not block waiting for a lock to become free - that's the
785 : : * caller's job.
786 : : *
787 : : * Returns true if the lock isn't free and we need to wait.
788 : : */
789 : : static bool
790 : 27256348 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
791 : : {
792 : 27256348 : uint32 old_state;
793 : :
794 [ + + + - ]: 27256348 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
795 : :
796 : : /*
797 : : * Read once outside the loop, later iterations will get the newer value
798 : : * via compare & exchange.
799 : : */
800 : 27256348 : old_state = pg_atomic_read_u32(&lock->state);
801 : :
802 : : /* loop until we've determined whether we could acquire the lock or not */
803 : 27262628 : while (true)
804 : : {
805 : 27262628 : uint32 desired_state;
806 : 27262628 : bool lock_free;
807 : :
808 : 27262628 : desired_state = old_state;
809 : :
810 [ + + ]: 27262628 : if (mode == LW_EXCLUSIVE)
811 : : {
812 : 13232498 : lock_free = (old_state & LW_LOCK_MASK) == 0;
813 [ + + ]: 13232498 : if (lock_free)
814 : 13222912 : desired_state += LW_VAL_EXCLUSIVE;
815 : 13232498 : }
816 : : else
817 : : {
818 : 14030130 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
819 [ + + ]: 14030130 : if (lock_free)
820 : 14029827 : desired_state += LW_VAL_SHARED;
821 : : }
822 : :
823 : : /*
824 : : * Attempt to swap in the state we are expecting. If we didn't see
825 : : * lock to be free, that's just the old value. If we saw it as free,
826 : : * we'll attempt to mark it acquired. The reason that we always swap
827 : : * in the value is that this doubles as a memory barrier. We could try
828 : : * to be smarter and only swap in values if we saw the lock as free,
829 : : * but benchmark haven't shown it as beneficial so far.
830 : : *
831 : : * Retry if the value changed since we last looked at it.
832 : : */
833 [ + + + + ]: 54525256 : if (pg_atomic_compare_exchange_u32(&lock->state,
834 : 27262628 : &old_state, desired_state))
835 : : {
836 [ + + ]: 27256348 : if (lock_free)
837 : : {
838 : : /* Great! Got the lock. */
839 : : #ifdef LOCK_DEBUG
840 : : if (mode == LW_EXCLUSIVE)
841 : : lock->owner = MyProc;
842 : : #endif
843 : 27248281 : return false;
844 : : }
845 : : else
846 : 8067 : return true; /* somebody else has the lock */
847 : : }
848 [ + + ]: 27262628 : }
849 : : pg_unreachable();
850 : 27256348 : }
851 : :
852 : : /*
853 : : * Lock the LWLock's wait list against concurrent activity.
854 : : *
855 : : * NB: even though the wait list is locked, non-conflicting lock operations
856 : : * may still happen concurrently.
857 : : *
858 : : * Time spent holding mutex should be short!
859 : : */
860 : : static void
861 : 40836 : LWLockWaitListLock(LWLock *lock)
862 : : {
863 : 40836 : uint32 old_state;
864 : : #ifdef LWLOCK_STATS
865 : : lwlock_stats *lwstats;
866 : : uint32 delays = 0;
867 : :
868 : : lwstats = get_lwlock_stats_entry(lock);
869 : : #endif
870 : :
871 : 41362 : while (true)
872 : : {
873 : : /*
874 : : * Always try once to acquire the lock directly, without setting up
875 : : * the spin-delay infrastructure. The work necessary for that shows up
876 : : * in profiles and is rarely necessary.
877 : : */
878 : 41362 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
879 [ + + ]: 41362 : if (likely(!(old_state & LW_FLAG_LOCKED)))
880 : 40836 : break; /* got lock */
881 : :
882 : : /* and then spin without atomic operations until lock is released */
883 : : {
884 : 526 : SpinDelayStatus delayStatus;
885 : :
886 : 526 : init_local_spin_delay(&delayStatus);
887 : :
888 [ + + ]: 2407 : while (old_state & LW_FLAG_LOCKED)
889 : : {
890 : 1881 : perform_spin_delay(&delayStatus);
891 : 1881 : old_state = pg_atomic_read_u32(&lock->state);
892 : : }
893 : : #ifdef LWLOCK_STATS
894 : : delays += delayStatus.delays;
895 : : #endif
896 : 526 : finish_spin_delay(&delayStatus);
897 : 526 : }
898 : :
899 : : /*
900 : : * Retry. The lock might obviously already be re-acquired by the time
901 : : * we're attempting to get it again.
902 : : */
903 : : }
904 : :
905 : : #ifdef LWLOCK_STATS
906 : : lwstats->spin_delay_count += delays;
907 : : #endif
908 : 40836 : }
909 : :
910 : : /*
911 : : * Unlock the LWLock's wait list.
912 : : *
913 : : * Note that it can be more efficient to manipulate flags and release the
914 : : * locks in a single atomic operation.
915 : : */
916 : : static void
917 : 36477 : LWLockWaitListUnlock(LWLock *lock)
918 : : {
919 : 36477 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
920 : :
921 : 36477 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
922 : :
923 [ + - ]: 36477 : Assert(old_state & LW_FLAG_LOCKED);
924 : 36477 : }
925 : :
926 : : /*
927 : : * Wakeup all the lockers that currently have a chance to acquire the lock.
928 : : */
929 : : static void
930 : 4359 : LWLockWakeup(LWLock *lock)
931 : : {
932 : 4359 : bool new_wake_in_progress = false;
933 : 4359 : bool wokeup_somebody = false;
934 : 4359 : proclist_head wakeup;
935 : 4359 : proclist_mutable_iter iter;
936 : :
937 : 4359 : proclist_init(&wakeup);
938 : :
939 : : /* lock wait list while collecting backends to wake up */
940 : 4359 : LWLockWaitListLock(lock);
941 : :
942 [ + + + + : 5483 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
+ + ]
943 : : {
944 : 4350 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
945 : :
946 [ + + + + ]: 4350 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
947 : 2 : continue;
948 : :
949 : 4348 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
950 : 4348 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
951 : :
952 [ + + ]: 4348 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
953 : : {
954 : : /*
955 : : * Prevent additional wakeups until retryer gets to run. Backends
956 : : * that are just waiting for the lock to become free don't retry
957 : : * automatically.
958 : : */
959 : 3342 : new_wake_in_progress = true;
960 : :
961 : : /*
962 : : * Don't wakeup (further) exclusive locks.
963 : : */
964 : 3342 : wokeup_somebody = true;
965 : 3342 : }
966 : :
967 : : /*
968 : : * Signal that the process isn't on the wait list anymore. This allows
969 : : * LWLockDequeueSelf() to remove itself of the waitlist with a
970 : : * proclist_delete(), rather than having to check if it has been
971 : : * removed from the list.
972 : : */
973 [ - + ]: 4348 : Assert(waiter->lwWaiting == LW_WS_WAITING);
974 : 4348 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
975 : :
976 : : /*
977 : : * Once we've woken up an exclusive lock, there's no point in waking
978 : : * up anybody else.
979 : : */
980 [ + + ]: 4348 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
981 : 3226 : break;
982 [ - + + + ]: 4350 : }
983 : :
984 [ + + + - ]: 4359 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
985 : :
986 : : /* unset required flags, and release lock, in one fell swoop */
987 : : {
988 : 4359 : uint32 old_state;
989 : 4359 : uint32 desired_state;
990 : :
991 : 4359 : old_state = pg_atomic_read_u32(&lock->state);
992 : 4423 : while (true)
993 : : {
994 : 4423 : desired_state = old_state;
995 : :
996 : : /* compute desired flags */
997 : :
998 [ + + ]: 4423 : if (new_wake_in_progress)
999 : 3356 : desired_state |= LW_FLAG_WAKE_IN_PROGRESS;
1000 : : else
1001 : 1067 : desired_state &= ~LW_FLAG_WAKE_IN_PROGRESS;
1002 : :
1003 [ + + ]: 4423 : if (proclist_is_empty(&lock->waiters))
1004 : 4155 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1005 : :
1006 : 4423 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1007 : :
1008 [ + + + + ]: 8846 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1009 : 4423 : desired_state))
1010 : 4359 : break;
1011 : : }
1012 : 4359 : }
1013 : :
1014 : : /* Awaken any waiters I removed from the queue. */
1015 [ + + + + : 8707 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1016 : : {
1017 : 4348 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1018 : :
1019 : : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1020 : 4348 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1021 : :
1022 : : /*
1023 : : * Guarantee that lwWaiting being unset only becomes visible once the
1024 : : * unlink from the link has completed. Otherwise the target backend
1025 : : * could be woken up for other reason and enqueue for a new lock - if
1026 : : * that happens before the list unlink happens, the list would end up
1027 : : * being corrupted.
1028 : : *
1029 : : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1030 : : * another lock.
1031 : : */
1032 : 4348 : pg_write_barrier();
1033 : 4348 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1034 : 4348 : PGSemaphoreUnlock(waiter->sem);
1035 : 4348 : }
1036 : 4359 : }
1037 : :
1038 : : /*
1039 : : * Add ourselves to the end of the queue.
1040 : : *
1041 : : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1042 : : */
1043 : : static void
1044 : 5257 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1045 : : {
1046 : : /*
1047 : : * If we don't have a PGPROC structure, there's no way to wait. This
1048 : : * should never occur, since MyProc should only be null during shared
1049 : : * memory initialization.
1050 : : */
1051 [ + - ]: 5257 : if (MyProc == NULL)
1052 [ # # # # ]: 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1053 : :
1054 [ + - ]: 5257 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1055 [ # # # # ]: 0 : elog(PANIC, "queueing for lock while waiting on another one");
1056 : :
1057 : 5257 : LWLockWaitListLock(lock);
1058 : :
1059 : : /* setting the flag is protected by the spinlock */
1060 : 5257 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1061 : :
1062 : 5257 : MyProc->lwWaiting = LW_WS_WAITING;
1063 : 5257 : MyProc->lwWaitMode = mode;
1064 : :
1065 : : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1066 [ + + ]: 5257 : if (mode == LW_WAIT_UNTIL_FREE)
1067 : 1150 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1068 : : else
1069 : 4107 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1070 : :
1071 : : /* Can release the mutex now */
1072 : 5257 : LWLockWaitListUnlock(lock);
1073 : :
1074 : : #ifdef LOCK_DEBUG
1075 : : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1076 : : #endif
1077 : 5257 : }
1078 : :
1079 : : /*
1080 : : * Remove ourselves from the waitlist.
1081 : : *
1082 : : * This is used if we queued ourselves because we thought we needed to sleep
1083 : : * but, after further checking, we discovered that we don't actually need to
1084 : : * do so.
1085 : : */
1086 : : static void
1087 : 1036 : LWLockDequeueSelf(LWLock *lock)
1088 : : {
1089 : 1036 : bool on_waitlist;
1090 : :
1091 : : #ifdef LWLOCK_STATS
1092 : : lwlock_stats *lwstats;
1093 : :
1094 : : lwstats = get_lwlock_stats_entry(lock);
1095 : :
1096 : : lwstats->dequeue_self_count++;
1097 : : #endif
1098 : :
1099 : 1036 : LWLockWaitListLock(lock);
1100 : :
1101 : : /*
1102 : : * Remove ourselves from the waitlist, unless we've already been removed.
1103 : : * The removal happens with the wait list lock held, so there's no race in
1104 : : * this check.
1105 : : */
1106 : 1036 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1107 [ + + ]: 1036 : if (on_waitlist)
1108 : 898 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1109 : :
1110 [ + + + + ]: 1036 : if (proclist_is_empty(&lock->waiters) &&
1111 : 1005 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1112 : : {
1113 : 871 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1114 : 871 : }
1115 : :
1116 : : /* XXX: combine with fetch_and above? */
1117 : 1036 : LWLockWaitListUnlock(lock);
1118 : :
1119 : : /* clear waiting state again, nice for debugging */
1120 [ + + ]: 1036 : if (on_waitlist)
1121 : 898 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1122 : : else
1123 : : {
1124 : 138 : int extraWaits = 0;
1125 : :
1126 : : /*
1127 : : * Somebody else dequeued us and has or will wake us up. Deal with the
1128 : : * superfluous absorption of a wakeup.
1129 : : */
1130 : :
1131 : : /*
1132 : : * Clear LW_FLAG_WAKE_IN_PROGRESS if somebody woke us before we
1133 : : * removed ourselves - they'll have set it.
1134 : : */
1135 : 138 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1136 : :
1137 : : /*
1138 : : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1139 : : * get reset at some inconvenient point later. Most of the time this
1140 : : * will immediately return.
1141 : : */
1142 : 138 : for (;;)
1143 : : {
1144 : 138 : PGSemaphoreLock(MyProc->sem);
1145 [ + - ]: 138 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1146 : 138 : break;
1147 : 0 : extraWaits++;
1148 : : }
1149 : :
1150 : : /*
1151 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1152 : : */
1153 [ - + ]: 138 : while (extraWaits-- > 0)
1154 : 0 : PGSemaphoreUnlock(MyProc->sem);
1155 : 138 : }
1156 : :
1157 : : #ifdef LOCK_DEBUG
1158 : : {
1159 : : /* not waiting anymore */
1160 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1161 : :
1162 : : Assert(nwaiters < MAX_BACKENDS);
1163 : : }
1164 : : #endif
1165 : 1036 : }
1166 : :
1167 : : /*
1168 : : * LWLockAcquire - acquire a lightweight lock in the specified mode
1169 : : *
1170 : : * If the lock is not available, sleep until it is. Returns true if the lock
1171 : : * was available immediately, false if we had to sleep.
1172 : : *
1173 : : * Side effect: cancel/die interrupts are held off until lock release.
1174 : : */
1175 : : bool
1176 : 27175387 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1177 : : {
1178 : 27175387 : PGPROC *proc = MyProc;
1179 : 27175387 : bool result = true;
1180 : 27175387 : int extraWaits = 0;
1181 : : #ifdef LWLOCK_STATS
1182 : : lwlock_stats *lwstats;
1183 : :
1184 : : lwstats = get_lwlock_stats_entry(lock);
1185 : : #endif
1186 : :
1187 [ + + + - ]: 27175387 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1188 : :
1189 : : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1190 : :
1191 : : #ifdef LWLOCK_STATS
1192 : : /* Count lock acquisition attempts */
1193 : : if (mode == LW_EXCLUSIVE)
1194 : : lwstats->ex_acquire_count++;
1195 : : else
1196 : : lwstats->sh_acquire_count++;
1197 : : #endif /* LWLOCK_STATS */
1198 : :
1199 : : /*
1200 : : * We can't wait if we haven't got a PGPROC. This should only occur
1201 : : * during bootstrap or shared memory initialization. Put an Assert here
1202 : : * to catch unsafe coding practices.
1203 : : */
1204 [ + + + - ]: 27175387 : Assert(!(proc == NULL && IsUnderPostmaster));
1205 : :
1206 : : /* Ensure we will have room to remember the lock */
1207 [ + - ]: 27175387 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1208 [ # # # # ]: 0 : elog(ERROR, "too many LWLocks taken");
1209 : :
1210 : : /*
1211 : : * Lock out cancel/die interrupts until we exit the code section protected
1212 : : * by the LWLock. This ensures that interrupts will not interfere with
1213 : : * manipulations of data structures in shared memory.
1214 : : */
1215 : 27175387 : HOLD_INTERRUPTS();
1216 : :
1217 : : /*
1218 : : * Loop here to try to acquire lock after each time we are signaled by
1219 : : * LWLockRelease.
1220 : : *
1221 : : * NOTE: it might seem better to have LWLockRelease actually grant us the
1222 : : * lock, rather than retrying and possibly having to go back to sleep. But
1223 : : * in practice that is no good because it means a process swap for every
1224 : : * lock acquisition when two or more processes are contending for the same
1225 : : * lock. Since LWLocks are normally used to protect not-very-long
1226 : : * sections of computation, a process needs to be able to acquire and
1227 : : * release the same lock many times during a single CPU time slice, even
1228 : : * in the presence of contention. The efficiency of being able to do that
1229 : : * outweighs the inefficiency of sometimes wasting a process dispatch
1230 : : * cycle because the lock is not free when a released waiter finally gets
1231 : : * to run. See pgsql-hackers archives for 29-Dec-01.
1232 : : */
1233 : 27178603 : for (;;)
1234 : : {
1235 : 27178603 : bool mustwait;
1236 : :
1237 : : /*
1238 : : * Try to grab the lock the first time, we're not in the waitqueue
1239 : : * yet/anymore.
1240 : : */
1241 : 27178603 : mustwait = LWLockAttemptLock(lock, mode);
1242 : :
1243 [ + + ]: 27178603 : if (!mustwait)
1244 : : {
1245 : : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1246 : 27174496 : break; /* got the lock */
1247 : : }
1248 : :
1249 : : /*
1250 : : * Ok, at this point we couldn't grab the lock on the first try. We
1251 : : * cannot simply queue ourselves to the end of the list and wait to be
1252 : : * woken up because by now the lock could long have been released.
1253 : : * Instead add us to the queue and try to grab the lock again. If we
1254 : : * succeed we need to revert the queuing and be happy, otherwise we
1255 : : * recheck the lock. If we still couldn't grab it, we know that the
1256 : : * other locker will see our queue entries when releasing since they
1257 : : * existed before we checked for the lock.
1258 : : */
1259 : :
1260 : : /* add to the queue */
1261 : 4107 : LWLockQueueSelf(lock, mode);
1262 : :
1263 : : /* we're now guaranteed to be woken up if necessary */
1264 : 4107 : mustwait = LWLockAttemptLock(lock, mode);
1265 : :
1266 : : /* ok, grabbed the lock the second time round, need to undo queueing */
1267 [ + + ]: 4107 : if (!mustwait)
1268 : : {
1269 : : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1270 : :
1271 : 891 : LWLockDequeueSelf(lock);
1272 : 891 : break;
1273 : : }
1274 : :
1275 : : /*
1276 : : * Wait until awakened.
1277 : : *
1278 : : * It is possible that we get awakened for a reason other than being
1279 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1280 : : * we've gotten the LWLock, re-increment the sema by the number of
1281 : : * additional signals received.
1282 : : */
1283 : : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1284 : :
1285 : : #ifdef LWLOCK_STATS
1286 : : lwstats->block_count++;
1287 : : #endif
1288 : :
1289 : 3216 : LWLockReportWaitStart(lock);
1290 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1291 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1292 : :
1293 : 3216 : for (;;)
1294 : : {
1295 : 3216 : PGSemaphoreLock(proc->sem);
1296 [ - + ]: 3216 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1297 : 3216 : break;
1298 : 0 : extraWaits++;
1299 : : }
1300 : :
1301 : : /* Retrying, allow LWLockRelease to release waiters again. */
1302 : 3216 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1303 : :
1304 : : #ifdef LOCK_DEBUG
1305 : : {
1306 : : /* not waiting anymore */
1307 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1308 : :
1309 : : Assert(nwaiters < MAX_BACKENDS);
1310 : : }
1311 : : #endif
1312 : :
1313 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1314 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1315 : 3216 : LWLockReportWaitEnd();
1316 : :
1317 : : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1318 : :
1319 : : /* Now loop back and try to acquire lock again. */
1320 : 3216 : result = false;
1321 [ - + + ]: 27178603 : }
1322 : :
1323 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1324 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1325 : :
1326 : : /* Add lock to list of locks held by this backend */
1327 : 27175387 : held_lwlocks[num_held_lwlocks].lock = lock;
1328 : 27175387 : held_lwlocks[num_held_lwlocks++].mode = mode;
1329 : :
1330 : : /*
1331 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1332 : : */
1333 [ - + ]: 27175387 : while (extraWaits-- > 0)
1334 : 0 : PGSemaphoreUnlock(proc->sem);
1335 : :
1336 : 54350774 : return result;
1337 : 27175387 : }
1338 : :
1339 : : /*
1340 : : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1341 : : *
1342 : : * If the lock is not available, return false with no side-effects.
1343 : : *
1344 : : * If successful, cancel/die interrupts are held off until lock release.
1345 : : */
1346 : : bool
1347 : 53053 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1348 : : {
1349 : 53053 : bool mustwait;
1350 : :
1351 [ + - + - ]: 53053 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1352 : :
1353 : : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1354 : :
1355 : : /* Ensure we will have room to remember the lock */
1356 [ + - ]: 53053 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1357 [ # # # # ]: 0 : elog(ERROR, "too many LWLocks taken");
1358 : :
1359 : : /*
1360 : : * Lock out cancel/die interrupts until we exit the code section protected
1361 : : * by the LWLock. This ensures that interrupts will not interfere with
1362 : : * manipulations of data structures in shared memory.
1363 : : */
1364 : 53053 : HOLD_INTERRUPTS();
1365 : :
1366 : : /* Check for the lock */
1367 : 53053 : mustwait = LWLockAttemptLock(lock, mode);
1368 : :
1369 [ + + ]: 53053 : if (mustwait)
1370 : : {
1371 : : /* Failed to get lock, so release interrupt holdoff */
1372 [ + - ]: 46 : RESUME_INTERRUPTS();
1373 : :
1374 : : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1375 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1376 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1377 : 46 : }
1378 : : else
1379 : : {
1380 : : /* Add lock to list of locks held by this backend */
1381 : 53007 : held_lwlocks[num_held_lwlocks].lock = lock;
1382 : 53007 : held_lwlocks[num_held_lwlocks++].mode = mode;
1383 : : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1384 : : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1385 : : }
1386 : 106106 : return !mustwait;
1387 : 53053 : }
1388 : :
1389 : : /*
1390 : : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1391 : : *
1392 : : * The semantics of this function are a bit funky. If the lock is currently
1393 : : * free, it is acquired in the given mode, and the function returns true. If
1394 : : * the lock isn't immediately free, the function waits until it is released
1395 : : * and returns false, but does not acquire the lock.
1396 : : *
1397 : : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1398 : : * holding WALWriteLock, it can flush the commit records of many other
1399 : : * backends as a side-effect. Those other backends need to wait until the
1400 : : * flush finishes, but don't need to acquire the lock anymore. They can just
1401 : : * wake up, observe that their records have already been flushed, and return.
1402 : : */
1403 : : bool
1404 : 20235 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1405 : : {
1406 : 20235 : PGPROC *proc = MyProc;
1407 : 20235 : bool mustwait;
1408 : 20235 : int extraWaits = 0;
1409 : : #ifdef LWLOCK_STATS
1410 : : lwlock_stats *lwstats;
1411 : :
1412 : : lwstats = get_lwlock_stats_entry(lock);
1413 : : #endif
1414 : :
1415 [ + - + - ]: 20235 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1416 : :
1417 : : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1418 : :
1419 : : /* Ensure we will have room to remember the lock */
1420 [ + - ]: 20235 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1421 [ # # # # ]: 0 : elog(ERROR, "too many LWLocks taken");
1422 : :
1423 : : /*
1424 : : * Lock out cancel/die interrupts until we exit the code section protected
1425 : : * by the LWLock. This ensures that interrupts will not interfere with
1426 : : * manipulations of data structures in shared memory.
1427 : : */
1428 : 20235 : HOLD_INTERRUPTS();
1429 : :
1430 : : /*
1431 : : * NB: We're using nearly the same twice-in-a-row lock acquisition
1432 : : * protocol as LWLockAcquire(). Check its comments for details.
1433 : : */
1434 : 20235 : mustwait = LWLockAttemptLock(lock, mode);
1435 : :
1436 [ + + ]: 20235 : if (mustwait)
1437 : : {
1438 : 350 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1439 : :
1440 : 350 : mustwait = LWLockAttemptLock(lock, mode);
1441 : :
1442 [ + + ]: 350 : if (mustwait)
1443 : : {
1444 : : /*
1445 : : * Wait until awakened. Like in LWLockAcquire, be prepared for
1446 : : * bogus wakeups.
1447 : : */
1448 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1449 : :
1450 : : #ifdef LWLOCK_STATS
1451 : : lwstats->block_count++;
1452 : : #endif
1453 : :
1454 : 348 : LWLockReportWaitStart(lock);
1455 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1456 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1457 : :
1458 : 348 : for (;;)
1459 : : {
1460 : 348 : PGSemaphoreLock(proc->sem);
1461 [ + - ]: 348 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1462 : 348 : break;
1463 : 0 : extraWaits++;
1464 : : }
1465 : :
1466 : : #ifdef LOCK_DEBUG
1467 : : {
1468 : : /* not waiting anymore */
1469 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1470 : :
1471 : : Assert(nwaiters < MAX_BACKENDS);
1472 : : }
1473 : : #endif
1474 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1475 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1476 : 348 : LWLockReportWaitEnd();
1477 : :
1478 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1479 : 348 : }
1480 : : else
1481 : : {
1482 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1483 : :
1484 : : /*
1485 : : * Got lock in the second attempt, undo queueing. We need to treat
1486 : : * this as having successfully acquired the lock, otherwise we'd
1487 : : * not necessarily wake up people we've prevented from acquiring
1488 : : * the lock.
1489 : : */
1490 : 2 : LWLockDequeueSelf(lock);
1491 : : }
1492 : 350 : }
1493 : :
1494 : : /*
1495 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1496 : : */
1497 [ - + ]: 20235 : while (extraWaits-- > 0)
1498 : 0 : PGSemaphoreUnlock(proc->sem);
1499 : :
1500 [ + + ]: 20235 : if (mustwait)
1501 : : {
1502 : : /* Failed to get lock, so release interrupt holdoff */
1503 [ + - ]: 348 : RESUME_INTERRUPTS();
1504 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1505 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1506 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1507 : 348 : }
1508 : : else
1509 : : {
1510 : : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1511 : : /* Add lock to list of locks held by this backend */
1512 : 19887 : held_lwlocks[num_held_lwlocks].lock = lock;
1513 : 19887 : held_lwlocks[num_held_lwlocks++].mode = mode;
1514 : : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1515 : : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1516 : : }
1517 : :
1518 : 40470 : return !mustwait;
1519 : 20235 : }
1520 : :
1521 : : /*
1522 : : * Does the lwlock in its current state need to wait for the variable value to
1523 : : * change?
1524 : : *
1525 : : * If we don't need to wait, and it's because the value of the variable has
1526 : : * changed, store the current value in newval.
1527 : : *
1528 : : * *result is set to true if the lock was free, and false otherwise.
1529 : : */
1530 : : static bool
1531 : 162421 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1532 : : uint64 *newval, bool *result)
1533 : : {
1534 : 162421 : bool mustwait;
1535 : 162421 : uint64 value;
1536 : :
1537 : : /*
1538 : : * Test first to see if it the slot is free right now.
1539 : : *
1540 : : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1541 : : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1542 : : * this, so we don't need a memory barrier here as far as the current
1543 : : * usage is concerned. But that might not be safe in general.
1544 : : */
1545 : 162421 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1546 : :
1547 [ + + ]: 162421 : if (!mustwait)
1548 : : {
1549 : 160184 : *result = true;
1550 : 160184 : return false;
1551 : : }
1552 : :
1553 : 2237 : *result = false;
1554 : :
1555 : : /*
1556 : : * Reading this value atomically is safe even on platforms where uint64
1557 : : * cannot be read without observing a torn value.
1558 : : */
1559 : 2237 : value = pg_atomic_read_u64(valptr);
1560 : :
1561 [ + + ]: 2237 : if (value != oldval)
1562 : : {
1563 : 780 : mustwait = false;
1564 : 780 : *newval = value;
1565 : 780 : }
1566 : : else
1567 : : {
1568 : 1457 : mustwait = true;
1569 : : }
1570 : :
1571 : 2237 : return mustwait;
1572 : 162421 : }
1573 : :
1574 : : /*
1575 : : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1576 : : *
1577 : : * If the lock is held and *valptr equals oldval, waits until the lock is
1578 : : * either freed, or the lock holder updates *valptr by calling
1579 : : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1580 : : * waiting), returns true. If the lock is still held, but *valptr no longer
1581 : : * matches oldval, returns false and sets *newval to the current value in
1582 : : * *valptr.
1583 : : *
1584 : : * Note: this function ignores shared lock holders; if the lock is held
1585 : : * in shared mode, returns 'true'.
1586 : : *
1587 : : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1588 : : * hence the caller of this function may want to rely on an explicit barrier or
1589 : : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1590 : : */
1591 : : bool
1592 : 160964 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1593 : : uint64 *newval)
1594 : : {
1595 : 160964 : PGPROC *proc = MyProc;
1596 : 160964 : int extraWaits = 0;
1597 : 160964 : bool result = false;
1598 : : #ifdef LWLOCK_STATS
1599 : : lwlock_stats *lwstats;
1600 : :
1601 : : lwstats = get_lwlock_stats_entry(lock);
1602 : : #endif
1603 : :
1604 : : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1605 : :
1606 : : /*
1607 : : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1608 : : * cleanup mechanism to remove us from the wait queue if we got
1609 : : * interrupted.
1610 : : */
1611 : 160964 : HOLD_INTERRUPTS();
1612 : :
1613 : : /*
1614 : : * Loop here to check the lock's status after each time we are signaled.
1615 : : */
1616 : 161621 : for (;;)
1617 : : {
1618 : 161621 : bool mustwait;
1619 : :
1620 : 161621 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1621 : : &result);
1622 : :
1623 [ + + ]: 161621 : if (!mustwait)
1624 : 160821 : break; /* the lock was free or value didn't match */
1625 : :
1626 : : /*
1627 : : * Add myself to wait queue. Note that this is racy, somebody else
1628 : : * could wakeup before we're finished queuing. NB: We're using nearly
1629 : : * the same twice-in-a-row lock acquisition protocol as
1630 : : * LWLockAcquire(). Check its comments for details. The only
1631 : : * difference is that we also have to check the variable's values when
1632 : : * checking the state of the lock.
1633 : : */
1634 : 800 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1635 : :
1636 : : /*
1637 : : * Clear LW_FLAG_WAKE_IN_PROGRESS flag, to make sure we get woken up
1638 : : * as soon as the lock is released.
1639 : : */
1640 : 800 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_WAKE_IN_PROGRESS);
1641 : :
1642 : : /*
1643 : : * We're now guaranteed to be woken up if necessary. Recheck the lock
1644 : : * and variables state.
1645 : : */
1646 : 800 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1647 : : &result);
1648 : :
1649 : : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1650 [ + + ]: 800 : if (!mustwait)
1651 : : {
1652 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1653 : :
1654 : 143 : LWLockDequeueSelf(lock);
1655 : 143 : break;
1656 : : }
1657 : :
1658 : : /*
1659 : : * Wait until awakened.
1660 : : *
1661 : : * It is possible that we get awakened for a reason other than being
1662 : : * signaled by LWLockRelease. If so, loop back and wait again. Once
1663 : : * we've gotten the LWLock, re-increment the sema by the number of
1664 : : * additional signals received.
1665 : : */
1666 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1667 : :
1668 : : #ifdef LWLOCK_STATS
1669 : : lwstats->block_count++;
1670 : : #endif
1671 : :
1672 : 657 : LWLockReportWaitStart(lock);
1673 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1674 : : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1675 : :
1676 : 657 : for (;;)
1677 : : {
1678 : 657 : PGSemaphoreLock(proc->sem);
1679 [ - + ]: 657 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1680 : 657 : break;
1681 : 0 : extraWaits++;
1682 : : }
1683 : :
1684 : : #ifdef LOCK_DEBUG
1685 : : {
1686 : : /* not waiting anymore */
1687 : : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1688 : :
1689 : : Assert(nwaiters < MAX_BACKENDS);
1690 : : }
1691 : : #endif
1692 : :
1693 : : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1694 : : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1695 : 657 : LWLockReportWaitEnd();
1696 : :
1697 : : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1698 : :
1699 : : /* Now loop back and check the status of the lock again. */
1700 [ - + + ]: 161621 : }
1701 : :
1702 : : /*
1703 : : * Fix the process wait semaphore's count for any absorbed wakeups.
1704 : : */
1705 [ - + ]: 160964 : while (extraWaits-- > 0)
1706 : 0 : PGSemaphoreUnlock(proc->sem);
1707 : :
1708 : : /*
1709 : : * Now okay to allow cancel/die interrupts.
1710 : : */
1711 [ + - ]: 160964 : RESUME_INTERRUPTS();
1712 : :
1713 : 321928 : return result;
1714 : 160964 : }
1715 : :
1716 : :
1717 : : /*
1718 : : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1719 : : *
1720 : : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1721 : : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1722 : : * waiting processes so that any process calling LWLockWaitForVar() on the same
1723 : : * lock is guaranteed to see the new value, and act accordingly.
1724 : : *
1725 : : * The caller must be holding the lock in exclusive mode.
1726 : : */
1727 : : void
1728 : 30184 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1729 : : {
1730 : 30184 : proclist_head wakeup;
1731 : 30184 : proclist_mutable_iter iter;
1732 : :
1733 : : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1734 : :
1735 : : /*
1736 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1737 : : * that the variable is updated before waking up waiters.
1738 : : */
1739 : 30184 : pg_atomic_exchange_u64(valptr, val);
1740 : :
1741 : 30184 : proclist_init(&wakeup);
1742 : :
1743 : 30184 : LWLockWaitListLock(lock);
1744 : :
1745 [ + - ]: 30184 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1746 : :
1747 : : /*
1748 : : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1749 : : * up. They are always in the front of the queue.
1750 : : */
1751 [ + + + + : 30190 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
+ + ]
1752 : : {
1753 : 262 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1754 : :
1755 [ + + ]: 262 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1756 : 256 : break;
1757 : :
1758 : 6 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1759 : 6 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1760 : :
1761 : : /* see LWLockWakeup() */
1762 [ - + ]: 6 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1763 : 6 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1764 [ - + + ]: 262 : }
1765 : :
1766 : : /* We are done updating shared state of the lock itself. */
1767 : 30184 : LWLockWaitListUnlock(lock);
1768 : :
1769 : : /*
1770 : : * Awaken any waiters I removed from the queue.
1771 : : */
1772 [ + + + + : 30190 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
+ + ]
1773 : : {
1774 : 6 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1775 : :
1776 : 6 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1777 : : /* check comment in LWLockWakeup() about this barrier */
1778 : 6 : pg_write_barrier();
1779 : 6 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1780 : 6 : PGSemaphoreUnlock(waiter->sem);
1781 : 6 : }
1782 : 30184 : }
1783 : :
1784 : :
1785 : : /*
1786 : : * LWLockRelease - release a previously acquired lock
1787 : : *
1788 : : * NB: This will leave lock->owner pointing to the current backend (if
1789 : : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1790 : : * debug cases of missing wakeups during lock release.
1791 : : */
1792 : : void
1793 : 27248281 : LWLockRelease(LWLock *lock)
1794 : : {
1795 : 27248281 : LWLockMode mode;
1796 : 27248281 : uint32 oldstate;
1797 : 27248281 : bool check_waiters;
1798 : 27248281 : int i;
1799 : :
1800 : : /*
1801 : : * Remove lock from list of locks held. Usually, but not always, it will
1802 : : * be the latest-acquired lock; so search array backwards.
1803 : : */
1804 [ - + ]: 27412743 : for (i = num_held_lwlocks; --i >= 0;)
1805 [ + + ]: 27412743 : if (lock == held_lwlocks[i].lock)
1806 : 27248281 : break;
1807 : :
1808 [ + - ]: 27248281 : if (i < 0)
1809 [ # # # # ]: 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1810 : :
1811 : 27248281 : mode = held_lwlocks[i].mode;
1812 : :
1813 : 27248281 : num_held_lwlocks--;
1814 [ + + ]: 27412743 : for (; i < num_held_lwlocks; i++)
1815 : 164462 : held_lwlocks[i] = held_lwlocks[i + 1];
1816 : :
1817 : : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1818 : :
1819 : : /*
1820 : : * Release my hold on lock, after that it can immediately be acquired by
1821 : : * others, even if we still have to wakeup other waiters.
1822 : : */
1823 [ + + ]: 27248281 : if (mode == LW_EXCLUSIVE)
1824 : 13221951 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1825 : : else
1826 : 14026330 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1827 : :
1828 : : /* nobody else can have that kind of lock */
1829 [ + - ]: 27248281 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1830 : :
1831 : : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1832 : : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1833 : :
1834 : : /*
1835 : : * Check if we're still waiting for backends to get scheduled, if so,
1836 : : * don't wake them up again.
1837 : : */
1838 [ + + ]: 27248281 : if ((oldstate & LW_FLAG_HAS_WAITERS) &&
1839 [ + + + + ]: 17936 : !(oldstate & LW_FLAG_WAKE_IN_PROGRESS) &&
1840 : 4469 : (oldstate & LW_LOCK_MASK) == 0)
1841 : 4359 : check_waiters = true;
1842 : : else
1843 : 27243922 : check_waiters = false;
1844 : :
1845 : : /*
1846 : : * As waking up waiters requires the spinlock to be acquired, only do so
1847 : : * if necessary.
1848 : : */
1849 [ + + ]: 27248281 : if (check_waiters)
1850 : : {
1851 : : /* XXX: remove before commit? */
1852 : : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1853 : 4359 : LWLockWakeup(lock);
1854 : 4359 : }
1855 : :
1856 : : /*
1857 : : * Now okay to allow cancel/die interrupts.
1858 : : */
1859 [ + - ]: 27248281 : RESUME_INTERRUPTS();
1860 : 27248281 : }
1861 : :
1862 : : /*
1863 : : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1864 : : */
1865 : : void
1866 : 2527085 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1867 : : {
1868 : : /*
1869 : : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1870 : : * that the variable is updated before releasing the lock.
1871 : : */
1872 : 2527085 : pg_atomic_exchange_u64(valptr, val);
1873 : :
1874 : 2527085 : LWLockRelease(lock);
1875 : 2527085 : }
1876 : :
1877 : :
1878 : : /*
1879 : : * LWLockReleaseAll - release all currently-held locks
1880 : : *
1881 : : * Used to clean up after ereport(ERROR). An important difference between this
1882 : : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1883 : : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1884 : : * has been set to an appropriate level earlier in error recovery. We could
1885 : : * decrement it below zero if we allow it to drop for each released lock!
1886 : : *
1887 : : * Note that this function must be safe to call even before the LWLock
1888 : : * subsystem has been initialized (e.g., during early startup failures).
1889 : : * In that case, num_held_lwlocks will be 0 and we do nothing.
1890 : : */
1891 : : void
1892 : 10633 : LWLockReleaseAll(void)
1893 : : {
1894 [ + + ]: 10636 : while (num_held_lwlocks > 0)
1895 : : {
1896 : 3 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1897 : :
1898 : 3 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1899 : : }
1900 : :
1901 [ + - ]: 10633 : Assert(num_held_lwlocks == 0);
1902 : 10633 : }
1903 : :
1904 : :
1905 : : /*
1906 : : * LWLockHeldByMe - test whether my process holds a lock in any mode
1907 : : *
1908 : : * This is meant as debug support only.
1909 : : */
1910 : : bool
1911 : 858265 : LWLockHeldByMe(LWLock *lock)
1912 : : {
1913 : 858265 : int i;
1914 : :
1915 [ + + ]: 1088847 : for (i = 0; i < num_held_lwlocks; i++)
1916 : : {
1917 [ + + ]: 1039308 : if (held_lwlocks[i].lock == lock)
1918 : 808726 : return true;
1919 : 230582 : }
1920 : 49539 : return false;
1921 : 858265 : }
1922 : :
1923 : : /*
1924 : : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1925 : : *
1926 : : * This is meant as debug support only.
1927 : : */
1928 : : bool
1929 : 85618 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1930 : : {
1931 : 85618 : char *held_lock_addr;
1932 : 85618 : char *begin;
1933 : 85618 : char *end;
1934 : 85618 : int i;
1935 : :
1936 : 85618 : begin = (char *) lock;
1937 : 85618 : end = begin + nlocks * stride;
1938 [ + + ]: 85622 : for (i = 0; i < num_held_lwlocks; i++)
1939 : : {
1940 : 4 : held_lock_addr = (char *) held_lwlocks[i].lock;
1941 [ + - ]: 4 : if (held_lock_addr >= begin &&
1942 [ - + # # ]: 4 : held_lock_addr < end &&
1943 : 0 : (held_lock_addr - begin) % stride == 0)
1944 : 0 : return true;
1945 : 4 : }
1946 : 85618 : return false;
1947 : 85618 : }
1948 : :
1949 : : /*
1950 : : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1951 : : *
1952 : : * This is meant as debug support only.
1953 : : */
1954 : : bool
1955 : 107631 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1956 : : {
1957 : 107631 : int i;
1958 : :
1959 [ + - ]: 107647 : for (i = 0; i < num_held_lwlocks; i++)
1960 : : {
1961 [ + + + - ]: 107647 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
1962 : 107631 : return true;
1963 : 16 : }
1964 : 0 : return false;
1965 : 107631 : }
|