Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * sysv_sema.c
4 : : * Implement PGSemaphores using SysV semaphore facilities
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/port/sysv_sema.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <signal.h>
18 : : #include <unistd.h>
19 : : #include <sys/file.h>
20 : : #include <sys/ipc.h>
21 : : #include <sys/sem.h>
22 : : #include <sys/stat.h>
23 : :
24 : : #include "miscadmin.h"
25 : : #include "storage/ipc.h"
26 : : #include "storage/pg_sema.h"
27 : : #include "storage/shmem.h"
28 : :
29 : :
30 : : typedef struct PGSemaphoreData
31 : : {
32 : : int semId; /* semaphore set identifier */
33 : : int semNum; /* semaphore number within set */
34 : : } PGSemaphoreData;
35 : :
36 : : #ifndef HAVE_UNION_SEMUN
37 : : union semun
38 : : {
39 : : int val;
40 : : struct semid_ds *buf;
41 : : unsigned short *array;
42 : : };
43 : : #endif
44 : :
45 : : typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
46 : : typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
47 : :
48 : : /*
49 : : * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
50 : : * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
51 : : * per set) parameter, which is often around 25. (Less than, because we
52 : : * allocate one extra sema in each set for identification purposes.)
53 : : */
54 : : #define SEMAS_PER_SET 16
55 : :
56 : : #define IPCProtection (0600) /* access/modify by user only */
57 : :
58 : : #define PGSemaMagic 537 /* must be less than SEMVMX */
59 : :
60 : :
61 : : static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
62 : : static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
63 : : static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
64 : : static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
65 : : static int numSemaSets; /* number of sema sets acquired so far */
66 : : static int maxSemaSets; /* allocated size of mySemaSets array */
67 : : static IpcSemaphoreKey nextSemaKey; /* next key to try using */
68 : : static int nextSemaNumber; /* next free sem num in last sema set */
69 : :
70 : :
71 : : static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
72 : : int numSems, bool retry_ok);
73 : : static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
74 : : int value);
75 : : static void IpcSemaphoreKill(IpcSemaphoreId semId);
76 : : static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
77 : : static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
78 : : static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
79 : : static void ReleaseSemaphores(int status, Datum arg);
80 : :
81 : :
82 : : /*
83 : : * InternalIpcSemaphoreCreate
84 : : *
85 : : * Attempt to create a new semaphore set with the specified key.
86 : : * Will fail (return -1) if such a set already exists.
87 : : *
88 : : * If we fail with a failure code other than collision-with-existing-set,
89 : : * print out an error and abort. Other types of errors suggest nonrecoverable
90 : : * problems.
91 : : *
92 : : * Unfortunately, it's sometimes hard to tell whether errors are
93 : : * nonrecoverable. Our caller keeps track of whether continuing to retry
94 : : * is sane or not; if not, we abort on failure regardless of the errno.
95 : : */
96 : : static IpcSemaphoreId
97 : 66 : InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems, bool retry_ok)
98 : : {
99 : 66 : int semId;
100 : :
101 : 66 : semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
102 : :
103 [ + - ]: 66 : if (semId < 0)
104 : : {
105 : 0 : int saved_errno = errno;
106 : :
107 : : /*
108 : : * Fail quietly if error suggests a collision with an existing set and
109 : : * our caller has not lost patience.
110 : : *
111 : : * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
112 : : * we could get a permission violation instead. On some platforms
113 : : * EINVAL will be reported if the existing set has too few semaphores.
114 : : * Also, EIDRM might occur if an old set is slated for destruction but
115 : : * not gone yet.
116 : : *
117 : : * EINVAL is the key reason why we need the caller-level loop limit,
118 : : * as it can also mean that the platform's SEMMSL is less than
119 : : * numSems, and that condition can't be fixed by trying another key.
120 : : */
121 [ # # ]: 0 : if (retry_ok &&
122 : 0 : (saved_errno == EEXIST
123 [ # # ]: 0 : || saved_errno == EACCES
124 [ # # ]: 0 : || saved_errno == EINVAL
125 : : #ifdef EIDRM
126 [ # # ]: 0 : || saved_errno == EIDRM
127 : : #endif
128 : : ))
129 : 0 : return -1;
130 : :
131 : : /*
132 : : * Else complain and abort
133 : : */
134 [ # # # # : 0 : ereport(FATAL,
# # ]
135 : : (errmsg("could not create semaphores: %m"),
136 : : errdetail("Failed system call was semget(%lu, %d, 0%o).",
137 : : (unsigned long) semKey, numSems,
138 : : IPC_CREAT | IPC_EXCL | IPCProtection),
139 : : (saved_errno == ENOSPC) ?
140 : : errhint("This error does *not* mean that you have run out of disk space. "
141 : : "It occurs when either the system limit for the maximum number of "
142 : : "semaphore sets (SEMMNI), or the system wide maximum number of "
143 : : "semaphores (SEMMNS), would be exceeded. You need to raise the "
144 : : "respective kernel parameter. Alternatively, reduce PostgreSQL's "
145 : : "consumption of semaphores by reducing its \"max_connections\" parameter.\n"
146 : : "The PostgreSQL documentation contains more information about "
147 : : "configuring your system for PostgreSQL.") : 0));
148 [ # # ]: 0 : }
149 : :
150 : 66 : return semId;
151 : 66 : }
152 : :
153 : : /*
154 : : * Initialize a semaphore to the specified value.
155 : : */
156 : : static void
157 : 1906 : IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
158 : : {
159 : 1906 : union semun semun;
160 : :
161 : 1906 : semun.val = value;
162 [ + - ]: 1906 : if (semctl(semId, semNum, SETVAL, semun) < 0)
163 : : {
164 : 0 : int saved_errno = errno;
165 : :
166 [ # # # # : 0 : ereport(FATAL,
# # ]
167 : : (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
168 : : semId, semNum, value),
169 : : (saved_errno == ERANGE) ?
170 : : errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
171 : : "%d. Look into the PostgreSQL documentation for details.",
172 : : value) : 0));
173 : 0 : }
174 : 1906 : }
175 : :
176 : : /*
177 : : * IpcSemaphoreKill(semId) - removes a semaphore set
178 : : */
179 : : static void
180 : 66 : IpcSemaphoreKill(IpcSemaphoreId semId)
181 : : {
182 : 66 : union semun semun;
183 : :
184 : 66 : semun.val = 0; /* unused, but keep compiler quiet */
185 : :
186 [ + - ]: 66 : if (semctl(semId, 0, IPC_RMID, semun) < 0)
187 [ # # # # ]: 0 : elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
188 : 66 : }
189 : :
190 : : /* Get the current value (semval) of the semaphore */
191 : : static int
192 : 0 : IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
193 : : {
194 : 0 : union semun dummy; /* for Solaris */
195 : :
196 : 0 : dummy.val = 0; /* unused */
197 : :
198 : 0 : return semctl(semId, semNum, GETVAL, dummy);
199 : 0 : }
200 : :
201 : : /* Get the PID of the last process to do semop() on the semaphore */
202 : : static pid_t
203 : 0 : IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
204 : : {
205 : 0 : union semun dummy; /* for Solaris */
206 : :
207 : 0 : dummy.val = 0; /* unused */
208 : :
209 : 0 : return semctl(semId, semNum, GETPID, dummy);
210 : 0 : }
211 : :
212 : :
213 : : /*
214 : : * Create a semaphore set with the given number of useful semaphores
215 : : * (an additional sema is actually allocated to serve as identifier).
216 : : * Dead Postgres sema sets are recycled if found, but we do not fail
217 : : * upon collision with non-Postgres sema sets.
218 : : *
219 : : * The idea here is to detect and re-use keys that may have been assigned
220 : : * by a crashed postmaster or backend.
221 : : */
222 : : static IpcSemaphoreId
223 : 66 : IpcSemaphoreCreate(int numSems)
224 : : {
225 : 66 : int num_tries = 0;
226 : 66 : IpcSemaphoreId semId;
227 : 66 : union semun semun;
228 : 66 : PGSemaphoreData mysema;
229 : :
230 : : /* Loop till we find a free IPC key */
231 : 66 : for (nextSemaKey++;; nextSemaKey++, num_tries++)
232 : : {
233 : 66 : pid_t creatorPID;
234 : :
235 : : /*
236 : : * Try to create new semaphore set. Give up after trying 1000
237 : : * distinct IPC keys.
238 : : */
239 : 132 : semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1,
240 : 66 : num_tries < 1000);
241 [ + - ]: 66 : if (semId >= 0)
242 : 66 : break; /* successful create */
243 : :
244 : : /* See if it looks to be leftover from a dead Postgres process */
245 : 0 : semId = semget(nextSemaKey, numSems + 1, 0);
246 [ # # ]: 0 : if (semId < 0)
247 : 0 : continue; /* failed: must be some other app's */
248 [ # # ]: 0 : if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
249 : 0 : continue; /* sema belongs to a non-Postgres app */
250 : :
251 : : /*
252 : : * If the creator PID is my own PID or does not belong to any extant
253 : : * process, it's safe to zap it.
254 : : */
255 : 0 : creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
256 [ # # ]: 0 : if (creatorPID <= 0)
257 : 0 : continue; /* oops, GETPID failed */
258 [ # # ]: 0 : if (creatorPID != getpid())
259 : : {
260 [ # # # # ]: 0 : if (kill(creatorPID, 0) == 0 || errno != ESRCH)
261 : 0 : continue; /* sema belongs to a live process */
262 : 0 : }
263 : :
264 : : /*
265 : : * The sema set appears to be from a dead Postgres process, or from a
266 : : * previous cycle of life in this same process. Zap it, if possible.
267 : : * This probably shouldn't fail, but if it does, assume the sema set
268 : : * belongs to someone else after all, and continue quietly.
269 : : */
270 : 0 : semun.val = 0; /* unused, but keep compiler quiet */
271 [ # # ]: 0 : if (semctl(semId, 0, IPC_RMID, semun) < 0)
272 : 0 : continue;
273 : :
274 : : /*
275 : : * Now try again to create the sema set.
276 : : */
277 : 0 : semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1, true);
278 [ # # ]: 0 : if (semId >= 0)
279 : 0 : break; /* successful create */
280 : :
281 : : /*
282 : : * Can only get here if some other process managed to create the same
283 : : * sema key before we did. Let him have that one, loop around to try
284 : : * next key.
285 : : */
286 [ - - - + ]: 66 : }
287 : :
288 : : /*
289 : : * OK, we created a new sema set. Mark it as created by this process. We
290 : : * do this by setting the spare semaphore to PGSemaMagic-1 and then
291 : : * incrementing it with semop(). That leaves it with value PGSemaMagic
292 : : * and sempid referencing this process.
293 : : */
294 : 66 : IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
295 : 66 : mysema.semId = semId;
296 : 66 : mysema.semNum = numSems;
297 : 66 : PGSemaphoreUnlock(&mysema);
298 : :
299 : 132 : return semId;
300 : 66 : }
301 : :
302 : :
303 : : /*
304 : : * Report amount of shared memory needed for semaphores
305 : : */
306 : : Size
307 : 15 : PGSemaphoreShmemSize(int maxSemas)
308 : : {
309 : 15 : return mul_size(maxSemas, sizeof(PGSemaphoreData));
310 : : }
311 : :
312 : : /*
313 : : * PGReserveSemaphores --- initialize semaphore support
314 : : *
315 : : * This is called during postmaster start or shared memory reinitialization.
316 : : * It should do whatever is needed to be able to support up to maxSemas
317 : : * subsequent PGSemaphoreCreate calls. Also, if any system resources
318 : : * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
319 : : * callback to release them.
320 : : *
321 : : * In the SysV implementation, we acquire semaphore sets on-demand; the
322 : : * maxSemas parameter is just used to size the arrays. There is an array
323 : : * of PGSemaphoreData structs in shared memory, and a postmaster-local array
324 : : * with one entry per SysV semaphore set, which we use for releasing the
325 : : * semaphore sets when done. (This design ensures that postmaster shutdown
326 : : * doesn't rely on the contents of shared memory, which a failed backend might
327 : : * have clobbered.)
328 : : */
329 : : void
330 : 6 : PGReserveSemaphores(int maxSemas)
331 : : {
332 : 6 : struct stat statbuf;
333 : :
334 : : /*
335 : : * We use the data directory's inode number to seed the search for free
336 : : * semaphore keys. This minimizes the odds of collision with other
337 : : * postmasters, while maximizing the odds that we will detect and clean up
338 : : * semaphores left over from a crashed postmaster in our own directory.
339 : : */
340 [ + - ]: 6 : if (stat(DataDir, &statbuf) < 0)
341 [ # # # # ]: 0 : ereport(FATAL,
342 : : (errcode_for_file_access(),
343 : : errmsg("could not stat data directory \"%s\": %m",
344 : : DataDir)));
345 : :
346 : 6 : sharedSemas = (PGSemaphore)
347 : 6 : ShmemAlloc(PGSemaphoreShmemSize(maxSemas));
348 : 6 : numSharedSemas = 0;
349 : 6 : maxSharedSemas = maxSemas;
350 : :
351 : 6 : maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
352 : 6 : mySemaSets = (IpcSemaphoreId *)
353 : 6 : malloc(maxSemaSets * sizeof(IpcSemaphoreId));
354 [ + - ]: 6 : if (mySemaSets == NULL)
355 [ # # # # ]: 0 : elog(PANIC, "out of memory");
356 : 6 : numSemaSets = 0;
357 : 6 : nextSemaKey = statbuf.st_ino;
358 : 6 : nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
359 : :
360 : 6 : on_shmem_exit(ReleaseSemaphores, 0);
361 : 6 : }
362 : :
363 : : /*
364 : : * Release semaphores at shutdown or shmem reinitialization
365 : : *
366 : : * (called as an on_shmem_exit callback, hence funny argument list)
367 : : */
368 : : static void
369 : 6 : ReleaseSemaphores(int status, Datum arg)
370 : : {
371 : 6 : int i;
372 : :
373 [ + + ]: 72 : for (i = 0; i < numSemaSets; i++)
374 : 66 : IpcSemaphoreKill(mySemaSets[i]);
375 : 6 : free(mySemaSets);
376 : 6 : }
377 : :
378 : : /*
379 : : * PGSemaphoreCreate
380 : : *
381 : : * Allocate a PGSemaphore structure with initial count 1
382 : : */
383 : : PGSemaphore
384 : 1034 : PGSemaphoreCreate(void)
385 : : {
386 : 1034 : PGSemaphore sema;
387 : :
388 : : /* Can't do this in a backend, because static state is postmaster's */
389 [ + - ]: 1034 : Assert(!IsUnderPostmaster);
390 : :
391 [ + + ]: 1034 : if (nextSemaNumber >= SEMAS_PER_SET)
392 : : {
393 : : /* Time to allocate another semaphore set */
394 [ + - ]: 66 : if (numSemaSets >= maxSemaSets)
395 [ # # # # ]: 0 : elog(PANIC, "too many semaphores created");
396 : 66 : mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
397 : 66 : numSemaSets++;
398 : 66 : nextSemaNumber = 0;
399 : 66 : }
400 : : /* Use the next shared PGSemaphoreData */
401 [ + - ]: 1034 : if (numSharedSemas >= maxSharedSemas)
402 [ # # # # ]: 0 : elog(PANIC, "too many semaphores created");
403 : 1034 : sema = &sharedSemas[numSharedSemas++];
404 : : /* Assign the next free semaphore in the current set */
405 : 1034 : sema->semId = mySemaSets[numSemaSets - 1];
406 : 1034 : sema->semNum = nextSemaNumber++;
407 : : /* Initialize it to count 1 */
408 : 1034 : IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
409 : :
410 : 2068 : return sema;
411 : 1034 : }
412 : :
413 : : /*
414 : : * PGSemaphoreReset
415 : : *
416 : : * Reset a previously-initialized PGSemaphore to have count 0
417 : : */
418 : : void
419 : 806 : PGSemaphoreReset(PGSemaphore sema)
420 : : {
421 : 806 : IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
422 : 806 : }
423 : :
424 : : /*
425 : : * PGSemaphoreLock
426 : : *
427 : : * Lock a semaphore (decrement count), blocking if count would be < 0
428 : : */
429 : : void
430 : 6181 : PGSemaphoreLock(PGSemaphore sema)
431 : : {
432 : 6181 : int errStatus;
433 : 6181 : struct sembuf sops;
434 : :
435 : 6181 : sops.sem_op = -1; /* decrement */
436 : 6181 : sops.sem_flg = 0;
437 : 6181 : sops.sem_num = sema->semNum;
438 : :
439 : : /*
440 : : * Note: if errStatus is -1 and errno == EINTR then it means we returned
441 : : * from the operation prematurely because we were sent a signal. So we
442 : : * try and lock the semaphore again.
443 : : *
444 : : * We used to check interrupts here, but that required servicing
445 : : * interrupts directly from signal handlers. Which is hard to do safely
446 : : * and portably.
447 : : */
448 : 6181 : do
449 : : {
450 : 6186 : errStatus = semop(sema->semId, &sops, 1);
451 [ + + + + ]: 6186 : } while (errStatus < 0 && errno == EINTR);
452 : :
453 [ + - ]: 6181 : if (errStatus < 0)
454 [ # # # # ]: 0 : elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
455 : 6181 : }
456 : :
457 : : /*
458 : : * PGSemaphoreUnlock
459 : : *
460 : : * Unlock a semaphore (increment count)
461 : : */
462 : : void
463 : 6241 : PGSemaphoreUnlock(PGSemaphore sema)
464 : : {
465 : 6241 : int errStatus;
466 : 6241 : struct sembuf sops;
467 : :
468 : 6241 : sops.sem_op = 1; /* increment */
469 : 6241 : sops.sem_flg = 0;
470 : 6241 : sops.sem_num = sema->semNum;
471 : :
472 : : /*
473 : : * Note: if errStatus is -1 and errno == EINTR then it means we returned
474 : : * from the operation prematurely because we were sent a signal. So we
475 : : * try and unlock the semaphore again. Not clear this can really happen,
476 : : * but might as well cope.
477 : : */
478 : 6241 : do
479 : : {
480 : 6241 : errStatus = semop(sema->semId, &sops, 1);
481 [ + - - + ]: 6241 : } while (errStatus < 0 && errno == EINTR);
482 : :
483 [ + - ]: 6241 : if (errStatus < 0)
484 [ # # # # ]: 0 : elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
485 : 6241 : }
486 : :
487 : : /*
488 : : * PGSemaphoreTryLock
489 : : *
490 : : * Lock a semaphore only if able to do so without blocking
491 : : */
492 : : bool
493 : 0 : PGSemaphoreTryLock(PGSemaphore sema)
494 : : {
495 : 0 : int errStatus;
496 : 0 : struct sembuf sops;
497 : :
498 : 0 : sops.sem_op = -1; /* decrement */
499 : 0 : sops.sem_flg = IPC_NOWAIT; /* but don't block */
500 : 0 : sops.sem_num = sema->semNum;
501 : :
502 : : /*
503 : : * Note: if errStatus is -1 and errno == EINTR then it means we returned
504 : : * from the operation prematurely because we were sent a signal. So we
505 : : * try and lock the semaphore again.
506 : : */
507 : 0 : do
508 : : {
509 : 0 : errStatus = semop(sema->semId, &sops, 1);
510 [ # # # # ]: 0 : } while (errStatus < 0 && errno == EINTR);
511 : :
512 [ # # ]: 0 : if (errStatus < 0)
513 : : {
514 : : /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
515 : : #ifdef EAGAIN
516 [ # # ]: 0 : if (errno == EAGAIN)
517 : 0 : return false; /* failed to lock it */
518 : : #endif
519 : : #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
520 : : if (errno == EWOULDBLOCK)
521 : : return false; /* failed to lock it */
522 : : #endif
523 : : /* Otherwise we got trouble */
524 [ # # # # ]: 0 : elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
525 : 0 : }
526 : :
527 : 0 : return true;
528 : 0 : }
|