Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * snapmgr.c
4 : : * PostgreSQL snapshot manager
5 : : *
6 : : * The following functions return an MVCC snapshot that can be used in tuple
7 : : * visibility checks:
8 : : *
9 : : * - GetTransactionSnapshot
10 : : * - GetLatestSnapshot
11 : : * - GetCatalogSnapshot
12 : : * - GetNonHistoricCatalogSnapshot
13 : : *
14 : : * Each of these functions returns a reference to a statically allocated
15 : : * snapshot. The statically allocated snapshot is subject to change on any
16 : : * snapshot-related function call, and should not be used directly. Instead,
17 : : * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived
18 : : * copy and use that.
19 : : *
20 : : * We keep track of snapshots in two ways: those "registered" by resowner.c,
21 : : * and the "active snapshot" stack. All snapshots in either of them live in
22 : : * persistent memory. When a snapshot is no longer in any of these lists
23 : : * (tracked by separate refcounts on each snapshot), its memory can be freed.
24 : : *
25 : : * In addition to the above-mentioned MVCC snapshots, there are some special
26 : : * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots. They can
27 : : * only be used in limited contexts and cannot be registered or pushed to the
28 : : * active stack.
29 : : *
30 : : * ActiveSnapshot stack
31 : : * --------------------
32 : : *
33 : : * Most visibility checks use the current "active snapshot" returned by
34 : : * GetActiveSnapshot(). When running normal queries, the active snapshot is
35 : : * set when query execution begins based on the transaction isolation level.
36 : : *
37 : : * The active snapshot is tracked in a stack so that the currently active one
38 : : * is at the top of the stack. It mirrors the process call stack: whenever we
39 : : * recurse or switch context to fetch rows from a different portal for
40 : : * example, the appropriate snapshot is pushed to become the active snapshot,
41 : : * and popped on return. Once upon a time, ActiveSnapshot was just a global
42 : : * variable that was saved and restored similar to CurrentMemoryContext, but
43 : : * nowadays it's managed as a separate data structure so that we can keep
44 : : * track of which snapshots are in use and reset MyProc->xmin when there is no
45 : : * active snapshot.
46 : : *
47 : : * However, there are a couple of exceptions where the active snapshot stack
48 : : * does not strictly mirror the call stack:
49 : : *
50 : : * - VACUUM and a few other utility commands manage their own transactions,
51 : : * which take their own snapshots. They are called with an active snapshot
52 : : * set, like most utility commands, but they pop the active snapshot that
53 : : * was pushed by the caller. PortalRunUtility knows about the possibility
54 : : * that the snapshot it pushed is no longer active on return.
55 : : *
56 : : * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the
57 : : * active snapshot stack is destroyed, and re-established later when
58 : : * subsequent statements in the procedure are executed. There are many
59 : : * limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such
60 : : * limitation is that all the snapshots on the active snapshot stack are
61 : : * known to portals that are being executed, which makes it safe to reset
62 : : * the stack. See EnsurePortalSnapshotExists().
63 : : *
64 : : * Registered snapshots
65 : : * --------------------
66 : : *
67 : : * In addition to snapshots pushed to the active snapshot stack, a snapshot
68 : : * can be registered with a resource owner.
69 : : *
70 : : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
71 : : * regd_count and list it in RegisteredSnapshots, but this reference is not
72 : : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
73 : : * to track this snapshot reference, but that introduces logical circularity
74 : : * and thus makes it impossible to clean up in a sane fashion. It's better to
75 : : * handle this reference as an internally-tracked registration, so that this
76 : : * module is entirely lower-level than ResourceOwners.
77 : : *
78 : : * Likewise, any snapshots that have been exported by pg_export_snapshot
79 : : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
80 : : * tracked by any resource owner.
81 : : *
82 : : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
83 : : * is valid, but is not tracked by any resource owner.
84 : : *
85 : : * The same is true for historic snapshots used during logical decoding,
86 : : * their lifetime is managed separately (as they live longer than one xact.c
87 : : * transaction).
88 : : *
89 : : * These arrangements let us reset MyProc->xmin when there are no snapshots
90 : : * referenced by this transaction, and advance it when the one with oldest
91 : : * Xmin is no longer referenced. For simplicity however, only registered
92 : : * snapshots not active snapshots participate in tracking which one is oldest;
93 : : * we don't try to change MyProc->xmin except when the active-snapshot
94 : : * stack is empty.
95 : : *
96 : : *
97 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
98 : : * Portions Copyright (c) 1994, Regents of the University of California
99 : : *
100 : : * IDENTIFICATION
101 : : * src/backend/utils/time/snapmgr.c
102 : : *
103 : : *-------------------------------------------------------------------------
104 : : */
105 : : #include "postgres.h"
106 : :
107 : : #include <sys/stat.h>
108 : : #include <unistd.h>
109 : :
110 : : #include "access/subtrans.h"
111 : : #include "access/transam.h"
112 : : #include "access/xact.h"
113 : : #include "datatype/timestamp.h"
114 : : #include "lib/pairingheap.h"
115 : : #include "miscadmin.h"
116 : : #include "port/pg_lfind.h"
117 : : #include "storage/fd.h"
118 : : #include "storage/predicate.h"
119 : : #include "storage/proc.h"
120 : : #include "storage/procarray.h"
121 : : #include "utils/builtins.h"
122 : : #include "utils/injection_point.h"
123 : : #include "utils/memutils.h"
124 : : #include "utils/resowner.h"
125 : : #include "utils/snapmgr.h"
126 : : #include "utils/syscache.h"
127 : :
128 : :
129 : : /*
130 : : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
131 : : * mode, and to the latest one taken in a read-committed transaction.
132 : : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
133 : : * instant, even in transaction-snapshot mode. It should only be used for
134 : : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
135 : : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
136 : : * whenever a system catalog change occurs.
137 : : *
138 : : * These SnapshotData structs are static to simplify memory allocation
139 : : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
140 : : */
141 : : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
142 : : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
143 : : static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
144 : : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
145 : : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
146 : : SnapshotData SnapshotToastData = {SNAPSHOT_TOAST};
147 : :
148 : : /* Pointers to valid snapshots */
149 : : static Snapshot CurrentSnapshot = NULL;
150 : : static Snapshot SecondarySnapshot = NULL;
151 : : static Snapshot CatalogSnapshot = NULL;
152 : : static Snapshot HistoricSnapshot = NULL;
153 : :
154 : : /*
155 : : * These are updated by GetSnapshotData. We initialize them this way
156 : : * for the convenience of TransactionIdIsInProgress: even in bootstrap
157 : : * mode, we don't want it to say that BootstrapTransactionId is in progress.
158 : : */
159 : : TransactionId TransactionXmin = FirstNormalTransactionId;
160 : : TransactionId RecentXmin = FirstNormalTransactionId;
161 : :
162 : : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
163 : : static HTAB *tuplecid_data = NULL;
164 : :
165 : : /*
166 : : * Elements of the active snapshot stack.
167 : : *
168 : : * Each element here accounts for exactly one active_count on SnapshotData.
169 : : *
170 : : * NB: the code assumes that elements in this list are in non-increasing
171 : : * order of as_level; also, the list must be NULL-terminated.
172 : : */
173 : : typedef struct ActiveSnapshotElt
174 : : {
175 : : Snapshot as_snap;
176 : : int as_level;
177 : : struct ActiveSnapshotElt *as_next;
178 : : } ActiveSnapshotElt;
179 : :
180 : : /* Top of the stack of active snapshots */
181 : : static ActiveSnapshotElt *ActiveSnapshot = NULL;
182 : :
183 : : /*
184 : : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
185 : : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
186 : : */
187 : : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
188 : : void *arg);
189 : :
190 : : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
191 : :
192 : : /* first GetTransactionSnapshot call in a transaction? */
193 : : bool FirstSnapshotSet = false;
194 : :
195 : : /*
196 : : * Remember the serializable transaction snapshot, if any. We cannot trust
197 : : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
198 : : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
199 : : */
200 : : static Snapshot FirstXactSnapshot = NULL;
201 : :
202 : : /* Define pathname of exported-snapshot files */
203 : : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
204 : :
205 : : /* Structure holding info about exported snapshot. */
206 : : typedef struct ExportedSnapshot
207 : : {
208 : : char *snapfile;
209 : : Snapshot snapshot;
210 : : } ExportedSnapshot;
211 : :
212 : : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
213 : : static List *exportedSnapshots = NIL;
214 : :
215 : : /* Prototypes for local functions */
216 : : static Snapshot CopySnapshot(Snapshot snapshot);
217 : : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
218 : : static void FreeSnapshot(Snapshot snapshot);
219 : : static void SnapshotResetXmin(void);
220 : :
221 : : /* ResourceOwner callbacks to track snapshot references */
222 : : static void ResOwnerReleaseSnapshot(Datum res);
223 : :
224 : : static const ResourceOwnerDesc snapshot_resowner_desc =
225 : : {
226 : : .name = "snapshot reference",
227 : : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
228 : : .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
229 : : .ReleaseResource = ResOwnerReleaseSnapshot,
230 : : .DebugPrint = NULL /* the default message is fine */
231 : : };
232 : :
233 : : /* Convenience wrappers over ResourceOwnerRemember/Forget */
234 : : static inline void
235 : 1789772 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
236 : : {
237 : 1789772 : ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
238 : 1789772 : }
239 : : static inline void
240 : 1775888 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
241 : : {
242 : 1775888 : ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
243 : 1775888 : }
244 : :
245 : : /*
246 : : * Snapshot fields to be serialized.
247 : : *
248 : : * Only these fields need to be sent to the cooperating backend; the
249 : : * remaining ones can (and must) be set by the receiver upon restore.
250 : : */
251 : : typedef struct SerializedSnapshotData
252 : : {
253 : : TransactionId xmin;
254 : : TransactionId xmax;
255 : : uint32 xcnt;
256 : : int32 subxcnt;
257 : : bool suboverflowed;
258 : : bool takenDuringRecovery;
259 : : CommandId curcid;
260 : : } SerializedSnapshotData;
261 : :
262 : : /*
263 : : * GetTransactionSnapshot
264 : : * Get the appropriate snapshot for a new query in a transaction.
265 : : *
266 : : * Note that the return value points at static storage that will be modified
267 : : * by future calls and by CommandCounterIncrement(). Callers must call
268 : : * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
269 : : * any other non-trivial work that could invalidate it.
270 : : */
271 : : Snapshot
272 : 520991 : GetTransactionSnapshot(void)
273 : : {
274 : : /*
275 : : * Return historic snapshot if doing logical decoding.
276 : : *
277 : : * Historic snapshots are only usable for catalog access, not for
278 : : * general-purpose queries. The caller is responsible for ensuring that
279 : : * the snapshot is used correctly! (PostgreSQL code never calls this
280 : : * during logical decoding, but extensions can do it.)
281 : : */
282 [ + - ]: 520991 : if (HistoricSnapshotActive())
283 : : {
284 : : /*
285 : : * We'll never need a non-historic transaction snapshot in this
286 : : * (sub-)transaction, so there's no need to be careful to set one up
287 : : * for later calls to GetTransactionSnapshot().
288 : : */
289 [ # # ]: 0 : Assert(!FirstSnapshotSet);
290 : 0 : return HistoricSnapshot;
291 : : }
292 : :
293 : : /* First call in transaction? */
294 [ + + ]: 520991 : if (!FirstSnapshotSet)
295 : : {
296 : : /*
297 : : * Don't allow catalog snapshot to be older than xact snapshot. Must
298 : : * do this first to allow the empty-heap Assert to succeed.
299 : : */
300 : 53152 : InvalidateCatalogSnapshot();
301 : :
302 [ + - ]: 53152 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
303 [ + - ]: 53152 : Assert(FirstXactSnapshot == NULL);
304 : :
305 [ + - ]: 53152 : if (IsInParallelMode())
306 [ # # # # ]: 0 : elog(ERROR,
307 : : "cannot take query snapshot during a parallel operation");
308 : :
309 : : /*
310 : : * In transaction-snapshot mode, the first snapshot must live until
311 : : * end of xact regardless of what the caller does with it, so we must
312 : : * make a copy of it rather than returning CurrentSnapshotData
313 : : * directly. Furthermore, if we're running in serializable mode,
314 : : * predicate.c needs to wrap the snapshot fetch in its own processing.
315 : : */
316 [ + + ]: 53152 : if (IsolationUsesXactSnapshot())
317 : : {
318 : : /* First, create the snapshot in CurrentSnapshotData */
319 [ + + ]: 32 : if (IsolationIsSerializable())
320 : 21 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
321 : : else
322 : 11 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
323 : : /* Make a saved copy */
324 : 32 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
325 : 32 : FirstXactSnapshot = CurrentSnapshot;
326 : : /* Mark it as "registered" in FirstXactSnapshot */
327 : 32 : FirstXactSnapshot->regd_count++;
328 : 32 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
329 : 32 : }
330 : : else
331 : 53120 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
332 : :
333 : 53152 : FirstSnapshotSet = true;
334 : 53152 : return CurrentSnapshot;
335 : : }
336 : :
337 [ + + ]: 467839 : if (IsolationUsesXactSnapshot())
338 : 58 : return CurrentSnapshot;
339 : :
340 : : /* Don't allow catalog snapshot to be older than xact snapshot. */
341 : 467781 : InvalidateCatalogSnapshot();
342 : :
343 : 467781 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
344 : :
345 : 467781 : return CurrentSnapshot;
346 : 520991 : }
347 : :
348 : : /*
349 : : * GetLatestSnapshot
350 : : * Get a snapshot that is up-to-date as of the current instant,
351 : : * even if we are executing in transaction-snapshot mode.
352 : : */
353 : : Snapshot
354 : 1144 : GetLatestSnapshot(void)
355 : : {
356 : : /*
357 : : * We might be able to relax this, but nothing that could otherwise work
358 : : * needs it.
359 : : */
360 [ + - ]: 1144 : if (IsInParallelMode())
361 [ # # # # ]: 0 : elog(ERROR,
362 : : "cannot update SecondarySnapshot during a parallel operation");
363 : :
364 : : /*
365 : : * So far there are no cases requiring support for GetLatestSnapshot()
366 : : * during logical decoding, but it wouldn't be hard to add if required.
367 : : */
368 [ + - ]: 1144 : Assert(!HistoricSnapshotActive());
369 : :
370 : : /* If first call in transaction, go ahead and set the xact snapshot */
371 [ + + ]: 1144 : if (!FirstSnapshotSet)
372 : 1 : return GetTransactionSnapshot();
373 : :
374 : 1143 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
375 : :
376 : 1143 : return SecondarySnapshot;
377 : 1144 : }
378 : :
379 : : /*
380 : : * GetCatalogSnapshot
381 : : * Get a snapshot that is sufficiently up-to-date for scan of the
382 : : * system catalog with the specified OID.
383 : : */
384 : : Snapshot
385 : 874727 : GetCatalogSnapshot(Oid relid)
386 : : {
387 : : /*
388 : : * Return historic snapshot while we're doing logical decoding, so we can
389 : : * see the appropriate state of the catalog.
390 : : *
391 : : * This is the primary reason for needing to reset the system caches after
392 : : * finishing decoding.
393 : : */
394 [ - + ]: 874727 : if (HistoricSnapshotActive())
395 : 0 : return HistoricSnapshot;
396 : :
397 : 874727 : return GetNonHistoricCatalogSnapshot(relid);
398 : 874727 : }
399 : :
400 : : /*
401 : : * GetNonHistoricCatalogSnapshot
402 : : * Get a snapshot that is sufficiently up-to-date for scan of the system
403 : : * catalog with the specified OID, even while historic snapshots are set
404 : : * up.
405 : : */
406 : : Snapshot
407 : 874727 : GetNonHistoricCatalogSnapshot(Oid relid)
408 : : {
409 : : /*
410 : : * If the caller is trying to scan a relation that has no syscache, no
411 : : * catcache invalidations will be sent when it is updated. For a few key
412 : : * relations, snapshot invalidations are sent instead. If we're trying to
413 : : * scan a relation for which neither catcache nor snapshot invalidations
414 : : * are sent, we must refresh the snapshot every time.
415 : : */
416 [ + + ]: 874727 : if (CatalogSnapshot &&
417 [ + + + + ]: 751588 : !RelationInvalidatesSnapshotsOnly(relid) &&
418 : 579656 : !RelationHasSysCache(relid))
419 : 59995 : InvalidateCatalogSnapshot();
420 : :
421 [ + + ]: 874727 : if (CatalogSnapshot == NULL)
422 : : {
423 : : /* Get new snapshot. */
424 : 183134 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
425 : :
426 : : /*
427 : : * Make sure the catalog snapshot will be accounted for in decisions
428 : : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
429 : : * that would result in making a physical copy, which is overkill; and
430 : : * it would also create a dependency on some resource owner, which we
431 : : * do not want for reasons explained at the head of this file. Instead
432 : : * just shove the CatalogSnapshot into the pairing heap manually. This
433 : : * has to be reversed in InvalidateCatalogSnapshot, of course.
434 : : *
435 : : * NB: it had better be impossible for this to throw error, since the
436 : : * CatalogSnapshot pointer is already valid.
437 : : */
438 : 183134 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
439 : 183134 : }
440 : :
441 : 874727 : return CatalogSnapshot;
442 : : }
443 : :
444 : : /*
445 : : * InvalidateCatalogSnapshot
446 : : * Mark the current catalog snapshot, if any, as invalid
447 : : *
448 : : * We could change this API to allow the caller to provide more fine-grained
449 : : * invalidation details, so that a change to relation A wouldn't prevent us
450 : : * from using our cached snapshot to scan relation B, but so far there's no
451 : : * evidence that the CPU cycles we spent tracking such fine details would be
452 : : * well-spent.
453 : : */
454 : : void
455 : 3340072 : InvalidateCatalogSnapshot(void)
456 : : {
457 [ + + ]: 3340072 : if (CatalogSnapshot)
458 : : {
459 : 183134 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
460 : 183134 : CatalogSnapshot = NULL;
461 : 183134 : SnapshotResetXmin();
462 : : INJECTION_POINT("invalidate-catalog-snapshot-end", NULL);
463 : 183134 : }
464 : 3340072 : }
465 : :
466 : : /*
467 : : * InvalidateCatalogSnapshotConditionally
468 : : * Drop catalog snapshot if it's the only one we have
469 : : *
470 : : * This is called when we are about to wait for client input, so we don't
471 : : * want to continue holding the catalog snapshot if it might mean that the
472 : : * global xmin horizon can't advance. However, if there are other snapshots
473 : : * still active or registered, the catalog snapshot isn't likely to be the
474 : : * oldest one, so we might as well keep it.
475 : : */
476 : : void
477 : 59905 : InvalidateCatalogSnapshotConditionally(void)
478 : : {
479 [ + + ]: 59905 : if (CatalogSnapshot &&
480 [ + + + + ]: 2176 : ActiveSnapshot == NULL &&
481 [ + - ]: 1078 : pairingheap_is_singular(&RegisteredSnapshots))
482 : 983 : InvalidateCatalogSnapshot();
483 : 59905 : }
484 : :
485 : : /*
486 : : * SnapshotSetCommandId
487 : : * Propagate CommandCounterIncrement into the static snapshots, if set
488 : : */
489 : : void
490 : 469537 : SnapshotSetCommandId(CommandId curcid)
491 : : {
492 [ + + ]: 469537 : if (!FirstSnapshotSet)
493 : 194 : return;
494 : :
495 [ - + ]: 469343 : if (CurrentSnapshot)
496 : 469343 : CurrentSnapshot->curcid = curcid;
497 [ + + ]: 469343 : if (SecondarySnapshot)
498 : 2197 : SecondarySnapshot->curcid = curcid;
499 : : /* Should we do the same with CatalogSnapshot? */
500 : 469537 : }
501 : :
502 : : /*
503 : : * SetTransactionSnapshot
504 : : * Set the transaction's snapshot from an imported MVCC snapshot.
505 : : *
506 : : * Note that this is very closely tied to GetTransactionSnapshot --- it
507 : : * must take care of all the same considerations as the first-snapshot case
508 : : * in GetTransactionSnapshot.
509 : : */
510 : : static void
511 : 477 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
512 : : int sourcepid, PGPROC *sourceproc)
513 : : {
514 : : /* Caller should have checked this already */
515 [ + - ]: 477 : Assert(!FirstSnapshotSet);
516 : :
517 : : /* Better do this to ensure following Assert succeeds. */
518 : 477 : InvalidateCatalogSnapshot();
519 : :
520 [ + - ]: 477 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
521 [ + - ]: 477 : Assert(FirstXactSnapshot == NULL);
522 [ + - ]: 477 : Assert(!HistoricSnapshotActive());
523 : :
524 : : /*
525 : : * Even though we are not going to use the snapshot it computes, we must
526 : : * call GetSnapshotData, for two reasons: (1) to be sure that
527 : : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
528 : : * the state for GlobalVis*.
529 : : */
530 : 477 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
531 : :
532 : : /*
533 : : * Now copy appropriate fields from the source snapshot.
534 : : */
535 : 477 : CurrentSnapshot->xmin = sourcesnap->xmin;
536 : 477 : CurrentSnapshot->xmax = sourcesnap->xmax;
537 : 477 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
538 [ + - ]: 477 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
539 [ + + ]: 477 : if (sourcesnap->xcnt > 0)
540 : 94 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
541 : : sourcesnap->xcnt * sizeof(TransactionId));
542 : 477 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
543 [ + - ]: 477 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
544 [ + - ]: 477 : if (sourcesnap->subxcnt > 0)
545 : 0 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
546 : : sourcesnap->subxcnt * sizeof(TransactionId));
547 : 477 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
548 : 477 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
549 : : /* NB: curcid should NOT be copied, it's a local matter */
550 : :
551 : 477 : CurrentSnapshot->snapXactCompletionCount = 0;
552 : :
553 : : /*
554 : : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
555 : : * TransactionXmin. There is a race condition: to make sure we are not
556 : : * causing the global xmin to go backwards, we have to test that the
557 : : * source transaction is still running, and that has to be done
558 : : * atomically. So let procarray.c do it.
559 : : *
560 : : * Note: in serializable mode, predicate.c will do this a second time. It
561 : : * doesn't seem worth contorting the logic here to avoid two calls,
562 : : * especially since it's not clear that predicate.c *must* do this.
563 : : */
564 [ + - ]: 477 : if (sourceproc != NULL)
565 : : {
566 [ + - ]: 477 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
567 [ # # # # ]: 0 : ereport(ERROR,
568 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
569 : : errmsg("could not import the requested snapshot"),
570 : : errdetail("The source transaction is not running anymore.")));
571 : 477 : }
572 [ # # ]: 0 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
573 [ # # # # ]: 0 : ereport(ERROR,
574 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
575 : : errmsg("could not import the requested snapshot"),
576 : : errdetail("The source process with PID %d is not running anymore.",
577 : : sourcepid)));
578 : :
579 : : /*
580 : : * In transaction-snapshot mode, the first snapshot must live until end of
581 : : * xact, so we must make a copy of it. Furthermore, if we're running in
582 : : * serializable mode, predicate.c needs to do its own processing.
583 : : */
584 [ + - ]: 477 : if (IsolationUsesXactSnapshot())
585 : : {
586 [ # # ]: 0 : if (IsolationIsSerializable())
587 : 0 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
588 : 0 : sourcepid);
589 : : /* Make a saved copy */
590 : 0 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
591 : 0 : FirstXactSnapshot = CurrentSnapshot;
592 : : /* Mark it as "registered" in FirstXactSnapshot */
593 : 0 : FirstXactSnapshot->regd_count++;
594 : 0 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
595 : 0 : }
596 : :
597 : 477 : FirstSnapshotSet = true;
598 : 477 : }
599 : :
600 : : /*
601 : : * CopySnapshot
602 : : * Copy the given snapshot.
603 : : *
604 : : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
605 : : * to 0. The returned snapshot has the copied flag set.
606 : : */
607 : : static Snapshot
608 : 1399525 : CopySnapshot(Snapshot snapshot)
609 : : {
610 : 1399525 : Snapshot newsnap;
611 : 1399525 : Size subxipoff;
612 : 1399525 : Size size;
613 : :
614 [ + - ]: 1399525 : Assert(snapshot != InvalidSnapshot);
615 : :
616 : : /* We allocate any XID arrays needed in the same palloc block. */
617 : 1399525 : size = subxipoff = sizeof(SnapshotData) +
618 : 1399525 : snapshot->xcnt * sizeof(TransactionId);
619 [ + + ]: 1399525 : if (snapshot->subxcnt > 0)
620 : 14829 : size += snapshot->subxcnt * sizeof(TransactionId);
621 : :
622 : 1399525 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
623 : 1399525 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
624 : :
625 : 1399525 : newsnap->regd_count = 0;
626 : 1399525 : newsnap->active_count = 0;
627 : 1399525 : newsnap->copied = true;
628 : 1399525 : newsnap->snapXactCompletionCount = 0;
629 : :
630 : : /* setup XID array */
631 [ + + ]: 1399525 : if (snapshot->xcnt > 0)
632 : : {
633 : 522592 : newsnap->xip = (TransactionId *) (newsnap + 1);
634 : 522592 : memcpy(newsnap->xip, snapshot->xip,
635 : : snapshot->xcnt * sizeof(TransactionId));
636 : 522592 : }
637 : : else
638 : 876933 : newsnap->xip = NULL;
639 : :
640 : : /*
641 : : * Setup subXID array. Don't bother to copy it if it had overflowed,
642 : : * though, because it's not used anywhere in that case. Except if it's a
643 : : * snapshot taken during recovery; all the top-level XIDs are in subxip as
644 : : * well in that case, so we mustn't lose them.
645 : : */
646 [ + + # # ]: 1399525 : if (snapshot->subxcnt > 0 &&
647 [ - + ]: 14829 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
648 : : {
649 : 14829 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
650 : 14829 : memcpy(newsnap->subxip, snapshot->subxip,
651 : : snapshot->subxcnt * sizeof(TransactionId));
652 : 14829 : }
653 : : else
654 : 1384696 : newsnap->subxip = NULL;
655 : :
656 : 2799050 : return newsnap;
657 : 1399525 : }
658 : :
659 : : /*
660 : : * FreeSnapshot
661 : : * Free the memory associated with a snapshot.
662 : : */
663 : : static void
664 : 1390485 : FreeSnapshot(Snapshot snapshot)
665 : : {
666 [ + - ]: 1390485 : Assert(snapshot->regd_count == 0);
667 [ + - ]: 1390485 : Assert(snapshot->active_count == 0);
668 [ + - ]: 1390485 : Assert(snapshot->copied);
669 : :
670 : 1390485 : pfree(snapshot);
671 : 1390485 : }
672 : :
673 : : /*
674 : : * PushActiveSnapshot
675 : : * Set the given snapshot as the current active snapshot
676 : : *
677 : : * If the passed snapshot is a statically-allocated one, or it is possibly
678 : : * subject to a future command counter update, create a new long-lived copy
679 : : * with active refcount=1. Otherwise, only increment the refcount.
680 : : */
681 : : void
682 : 527744 : PushActiveSnapshot(Snapshot snapshot)
683 : : {
684 : 527744 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
685 : 527744 : }
686 : :
687 : : /*
688 : : * PushActiveSnapshotWithLevel
689 : : * Set the given snapshot as the current active snapshot
690 : : *
691 : : * Same as PushActiveSnapshot except that caller can specify the
692 : : * transaction nesting level that "owns" the snapshot. This level
693 : : * must not be deeper than the current top of the snapshot stack.
694 : : */
695 : : void
696 : 550489 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
697 : : {
698 : 550489 : ActiveSnapshotElt *newactive;
699 : :
700 [ + - ]: 550489 : Assert(snapshot != InvalidSnapshot);
701 [ + + + - ]: 550489 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
702 : :
703 : 550489 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
704 : :
705 : : /*
706 : : * Checking SecondarySnapshot is probably useless here, but it seems
707 : : * better to be sure.
708 : : */
709 [ + + + + : 550489 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
- + ]
710 : 41944 : !snapshot->copied)
711 : 508545 : newactive->as_snap = CopySnapshot(snapshot);
712 : : else
713 : 41944 : newactive->as_snap = snapshot;
714 : :
715 : 550489 : newactive->as_next = ActiveSnapshot;
716 : 550489 : newactive->as_level = snap_level;
717 : :
718 : 550489 : newactive->as_snap->active_count++;
719 : :
720 : 550489 : ActiveSnapshot = newactive;
721 : 550489 : }
722 : :
723 : : /*
724 : : * PushCopiedSnapshot
725 : : * As above, except forcibly copy the presented snapshot.
726 : : *
727 : : * This should be used when the ActiveSnapshot has to be modifiable, for
728 : : * example if the caller intends to call UpdateActiveSnapshotCommandId.
729 : : * The new snapshot will be released when popped from the stack.
730 : : */
731 : : void
732 : 10398 : PushCopiedSnapshot(Snapshot snapshot)
733 : : {
734 : 10398 : PushActiveSnapshot(CopySnapshot(snapshot));
735 : 10398 : }
736 : :
737 : : /*
738 : : * UpdateActiveSnapshotCommandId
739 : : *
740 : : * Update the current CID of the active snapshot. This can only be applied
741 : : * to a snapshot that is not referenced elsewhere.
742 : : */
743 : : void
744 : 415177 : UpdateActiveSnapshotCommandId(void)
745 : : {
746 : 415177 : CommandId save_curcid,
747 : : curcid;
748 : :
749 [ + - ]: 415177 : Assert(ActiveSnapshot != NULL);
750 [ + - ]: 415177 : Assert(ActiveSnapshot->as_snap->active_count == 1);
751 [ + - ]: 415177 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
752 : :
753 : : /*
754 : : * Don't allow modification of the active snapshot during parallel
755 : : * operation. We share the snapshot to worker backends at the beginning
756 : : * of parallel operation, so any change to the snapshot can lead to
757 : : * inconsistencies. We have other defenses against
758 : : * CommandCounterIncrement, but there are a few places that call this
759 : : * directly, so we put an additional guard here.
760 : : */
761 : 415177 : save_curcid = ActiveSnapshot->as_snap->curcid;
762 : 415177 : curcid = GetCurrentCommandId(false);
763 [ + + + - ]: 415177 : if (IsInParallelMode() && save_curcid != curcid)
764 [ # # # # ]: 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
765 : 415177 : ActiveSnapshot->as_snap->curcid = curcid;
766 : 415177 : }
767 : :
768 : : /*
769 : : * PopActiveSnapshot
770 : : *
771 : : * Remove the topmost snapshot from the active snapshot stack, decrementing the
772 : : * reference count, and free it if this was the last reference.
773 : : */
774 : : void
775 : 539382 : PopActiveSnapshot(void)
776 : : {
777 : 539382 : ActiveSnapshotElt *newstack;
778 : :
779 : 539382 : newstack = ActiveSnapshot->as_next;
780 : :
781 [ + - ]: 539382 : Assert(ActiveSnapshot->as_snap->active_count > 0);
782 : :
783 : 539382 : ActiveSnapshot->as_snap->active_count--;
784 : :
785 [ + + + + ]: 539382 : if (ActiveSnapshot->as_snap->active_count == 0 &&
786 : 538492 : ActiveSnapshot->as_snap->regd_count == 0)
787 : 484559 : FreeSnapshot(ActiveSnapshot->as_snap);
788 : :
789 : 539382 : pfree(ActiveSnapshot);
790 : 539382 : ActiveSnapshot = newstack;
791 : :
792 : 539382 : SnapshotResetXmin();
793 : 539382 : }
794 : :
795 : : /*
796 : : * GetActiveSnapshot
797 : : * Return the topmost snapshot in the Active stack.
798 : : */
799 : : Snapshot
800 : 1408647 : GetActiveSnapshot(void)
801 : : {
802 [ + - ]: 1408647 : Assert(ActiveSnapshot != NULL);
803 : :
804 : 1408647 : return ActiveSnapshot->as_snap;
805 : : }
806 : :
807 : : /*
808 : : * ActiveSnapshotSet
809 : : * Return whether there is at least one snapshot in the Active stack
810 : : */
811 : : bool
812 : 949140 : ActiveSnapshotSet(void)
813 : : {
814 : 949140 : return ActiveSnapshot != NULL;
815 : : }
816 : :
817 : : /*
818 : : * RegisterSnapshot
819 : : * Register a snapshot as being in use by the current resource owner
820 : : *
821 : : * If InvalidSnapshot is passed, it is not registered.
822 : : */
823 : : Snapshot
824 : 2698288 : RegisterSnapshot(Snapshot snapshot)
825 : : {
826 [ + + ]: 2698288 : if (snapshot == InvalidSnapshot)
827 : 908526 : return InvalidSnapshot;
828 : :
829 : 1789762 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
830 : 2698288 : }
831 : :
832 : : /*
833 : : * RegisterSnapshotOnOwner
834 : : * As above, but use the specified resource owner
835 : : */
836 : : Snapshot
837 : 1789772 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
838 : : {
839 : 1789772 : Snapshot snap;
840 : :
841 [ + - ]: 1789772 : if (snapshot == InvalidSnapshot)
842 : 0 : return InvalidSnapshot;
843 : :
844 : : /* Static snapshot? Create a persistent copy */
845 [ + + ]: 1789772 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
846 : :
847 : : /* and tell resowner.c about it */
848 : 1789772 : ResourceOwnerEnlarge(owner);
849 : 1789772 : snap->regd_count++;
850 : 1789772 : ResourceOwnerRememberSnapshot(owner, snap);
851 : :
852 [ + + ]: 1789772 : if (snap->regd_count == 1)
853 : 1333518 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
854 : :
855 : 1789772 : return snap;
856 : 1789772 : }
857 : :
858 : : /*
859 : : * UnregisterSnapshot
860 : : *
861 : : * Decrement the reference count of a snapshot, remove the corresponding
862 : : * reference from CurrentResourceOwner, and free the snapshot if no more
863 : : * references remain.
864 : : */
865 : : void
866 : 2666745 : UnregisterSnapshot(Snapshot snapshot)
867 : : {
868 [ + + ]: 2666745 : if (snapshot == NULL)
869 : 894911 : return;
870 : :
871 : 1771834 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
872 : 2666745 : }
873 : :
874 : : /*
875 : : * UnregisterSnapshotFromOwner
876 : : * As above, but use the specified resource owner
877 : : */
878 : : void
879 : 1775888 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
880 : : {
881 [ + - ]: 1775888 : if (snapshot == NULL)
882 : 0 : return;
883 : :
884 : 1775888 : ResourceOwnerForgetSnapshot(owner, snapshot);
885 : 1775888 : UnregisterSnapshotNoOwner(snapshot);
886 : 1775888 : }
887 : :
888 : : static void
889 : 1789772 : UnregisterSnapshotNoOwner(Snapshot snapshot)
890 : : {
891 [ + - ]: 1789772 : Assert(snapshot->regd_count > 0);
892 [ + - ]: 1789772 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
893 : :
894 : 1789772 : snapshot->regd_count--;
895 [ + + ]: 1789772 : if (snapshot->regd_count == 0)
896 : 1333518 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
897 : :
898 [ + + + + ]: 1789772 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
899 : : {
900 : 904990 : FreeSnapshot(snapshot);
901 : 904990 : SnapshotResetXmin();
902 : 904990 : }
903 : 1789772 : }
904 : :
905 : : /*
906 : : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
907 : : * by xmin, so that the snapshot with smallest xmin is at the top.
908 : : */
909 : : static int
910 : 1326157 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
911 : : {
912 : 1326157 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
913 : 1326157 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
914 : :
915 [ + + ]: 1326157 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
916 : 18250 : return 1;
917 [ + + ]: 1307907 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
918 : 100 : return -1;
919 : : else
920 : 1307807 : return 0;
921 : 1326157 : }
922 : :
923 : : /*
924 : : * SnapshotResetXmin
925 : : *
926 : : * If there are no more snapshots, we can reset our PGPROC->xmin to
927 : : * InvalidTransactionId. Note we can do this without locking because we assume
928 : : * that storing an Xid is atomic.
929 : : *
930 : : * Even if there are some remaining snapshots, we may be able to advance our
931 : : * PGPROC->xmin to some degree. This typically happens when a portal is
932 : : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
933 : : * the active snapshot stack is empty; this allows us not to need to track
934 : : * which active snapshot is oldest.
935 : : */
936 : : static void
937 : 1635705 : SnapshotResetXmin(void)
938 : : {
939 : 1635705 : Snapshot minSnapshot;
940 : :
941 [ + + ]: 1635705 : if (ActiveSnapshot != NULL)
942 : 1436111 : return;
943 : :
944 [ + + ]: 199594 : if (pairingheap_is_empty(&RegisteredSnapshots))
945 : : {
946 : 96889 : MyProc->xmin = TransactionXmin = InvalidTransactionId;
947 : 96889 : return;
948 : : }
949 : :
950 : 102705 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
951 : : pairingheap_first(&RegisteredSnapshots));
952 : :
953 [ + + ]: 102705 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
954 : 1262 : MyProc->xmin = TransactionXmin = minSnapshot->xmin;
955 [ - + ]: 1635705 : }
956 : :
957 : : /*
958 : : * AtSubCommit_Snapshot
959 : : */
960 : : void
961 : 482 : AtSubCommit_Snapshot(int level)
962 : : {
963 : 482 : ActiveSnapshotElt *active;
964 : :
965 : : /*
966 : : * Relabel the active snapshots set in this subtransaction as though they
967 : : * are owned by the parent subxact.
968 : : */
969 [ + + ]: 482 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
970 : : {
971 [ + - ]: 400 : if (active->as_level < level)
972 : 400 : break;
973 : 0 : active->as_level = level - 1;
974 : 0 : }
975 : 482 : }
976 : :
977 : : /*
978 : : * AtSubAbort_Snapshot
979 : : * Clean up snapshots after a subtransaction abort
980 : : */
981 : : void
982 : 1183 : AtSubAbort_Snapshot(int level)
983 : : {
984 : : /* Forget the active snapshots set by this subtransaction */
985 [ + + + + ]: 2119 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
986 : : {
987 : 936 : ActiveSnapshotElt *next;
988 : :
989 : 936 : next = ActiveSnapshot->as_next;
990 : :
991 : : /*
992 : : * Decrement the snapshot's active count. If it's still registered or
993 : : * marked as active by an outer subtransaction, we can't free it yet.
994 : : */
995 [ + - ]: 936 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
996 : 936 : ActiveSnapshot->as_snap->active_count -= 1;
997 : :
998 [ + - - + ]: 936 : if (ActiveSnapshot->as_snap->active_count == 0 &&
999 : 936 : ActiveSnapshot->as_snap->regd_count == 0)
1000 : 936 : FreeSnapshot(ActiveSnapshot->as_snap);
1001 : :
1002 : : /* and free the stack element */
1003 : 936 : pfree(ActiveSnapshot);
1004 : :
1005 : 936 : ActiveSnapshot = next;
1006 : 936 : }
1007 : :
1008 : 1183 : SnapshotResetXmin();
1009 : 1183 : }
1010 : :
1011 : : /*
1012 : : * AtEOXact_Snapshot
1013 : : * Snapshot manager's cleanup function for end of transaction
1014 : : */
1015 : : void
1016 : 57914 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1017 : : {
1018 : : /*
1019 : : * In transaction-snapshot mode we must release our privately-managed
1020 : : * reference to the transaction snapshot. We must remove it from
1021 : : * RegisteredSnapshots to keep the check below happy. But we don't bother
1022 : : * to do FreeSnapshot, for two reasons: the memory will go away with
1023 : : * TopTransactionContext anyway, and if someone has left the snapshot
1024 : : * stacked as active, we don't want the code below to be chasing through a
1025 : : * dangling pointer.
1026 : : */
1027 [ + + ]: 57914 : if (FirstXactSnapshot != NULL)
1028 : : {
1029 [ + - ]: 32 : Assert(FirstXactSnapshot->regd_count > 0);
1030 [ + - ]: 32 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1031 : 32 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1032 : 32 : }
1033 : 57914 : FirstXactSnapshot = NULL;
1034 : :
1035 : : /*
1036 : : * If we exported any snapshots, clean them up.
1037 : : */
1038 [ - + ]: 57914 : if (exportedSnapshots != NIL)
1039 : : {
1040 : 0 : ListCell *lc;
1041 : :
1042 : : /*
1043 : : * Get rid of the files. Unlink failure is only a WARNING because (1)
1044 : : * it's too late to abort the transaction, and (2) leaving a leaked
1045 : : * file around has little real consequence anyway.
1046 : : *
1047 : : * We also need to remove the snapshots from RegisteredSnapshots to
1048 : : * prevent a warning below.
1049 : : *
1050 : : * As with the FirstXactSnapshot, we don't need to free resources of
1051 : : * the snapshot itself as it will go away with the memory context.
1052 : : */
1053 [ # # # # : 0 : foreach(lc, exportedSnapshots)
# # ]
1054 : : {
1055 : 0 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1056 : :
1057 [ # # ]: 0 : if (unlink(esnap->snapfile))
1058 [ # # # # ]: 0 : elog(WARNING, "could not unlink file \"%s\": %m",
1059 : : esnap->snapfile);
1060 : :
1061 : 0 : pairingheap_remove(&RegisteredSnapshots,
1062 : 0 : &esnap->snapshot->ph_node);
1063 : 0 : }
1064 : :
1065 : 0 : exportedSnapshots = NIL;
1066 : 0 : }
1067 : :
1068 : : /* Drop catalog snapshot if any */
1069 : 57914 : InvalidateCatalogSnapshot();
1070 : :
1071 : : /* On commit, complain about leftover snapshots */
1072 [ + + ]: 57914 : if (isCommit)
1073 : : {
1074 : 50898 : ActiveSnapshotElt *active;
1075 : :
1076 [ + - ]: 50898 : if (!pairingheap_is_empty(&RegisteredSnapshots))
1077 [ # # # # ]: 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
1078 : :
1079 : : /* complain about unpopped active snapshots */
1080 [ - + ]: 50898 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1081 [ # # # # ]: 0 : elog(WARNING, "snapshot %p still active", active);
1082 : 50898 : }
1083 : :
1084 : : /*
1085 : : * And reset our state. We don't need to free the memory explicitly --
1086 : : * it'll go away with TopTransactionContext.
1087 : : */
1088 : 57914 : ActiveSnapshot = NULL;
1089 : 57914 : pairingheap_reset(&RegisteredSnapshots);
1090 : :
1091 : 57914 : CurrentSnapshot = NULL;
1092 : 57914 : SecondarySnapshot = NULL;
1093 : :
1094 : 57914 : FirstSnapshotSet = false;
1095 : :
1096 : : /*
1097 : : * During normal commit processing, we call ProcArrayEndTransaction() to
1098 : : * reset the MyProc->xmin. That call happens prior to the call to
1099 : : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1100 : : */
1101 [ + + ]: 57914 : if (resetXmin)
1102 : 7016 : SnapshotResetXmin();
1103 : :
1104 [ + + + - ]: 57914 : Assert(resetXmin || MyProc->xmin == 0);
1105 : 57914 : }
1106 : :
1107 : :
1108 : : /*
1109 : : * ExportSnapshot
1110 : : * Export the snapshot to a file so that other backends can import it.
1111 : : * Returns the token (the file name) that can be used to import this
1112 : : * snapshot.
1113 : : */
1114 : : char *
1115 : 0 : ExportSnapshot(Snapshot snapshot)
1116 : : {
1117 : 0 : TransactionId topXid;
1118 : 0 : TransactionId *children;
1119 : 0 : ExportedSnapshot *esnap;
1120 : 0 : int nchildren;
1121 : 0 : int addTopXid;
1122 : 0 : StringInfoData buf;
1123 : 0 : FILE *f;
1124 : 0 : int i;
1125 : 0 : MemoryContext oldcxt;
1126 : 0 : char path[MAXPGPATH];
1127 : 0 : char pathtmp[MAXPGPATH];
1128 : :
1129 : : /*
1130 : : * It's tempting to call RequireTransactionBlock here, since it's not very
1131 : : * useful to export a snapshot that will disappear immediately afterwards.
1132 : : * However, we haven't got enough information to do that, since we don't
1133 : : * know if we're at top level or not. For example, we could be inside a
1134 : : * plpgsql function that is going to fire off other transactions via
1135 : : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1136 : : * check.
1137 : : *
1138 : : * Also note that we don't make any restriction on the transaction's
1139 : : * isolation level; however, importers must check the level if they are
1140 : : * serializable.
1141 : : */
1142 : :
1143 : : /*
1144 : : * Get our transaction ID if there is one, to include in the snapshot.
1145 : : */
1146 : 0 : topXid = GetTopTransactionIdIfAny();
1147 : :
1148 : : /*
1149 : : * We cannot export a snapshot from a subtransaction because there's no
1150 : : * easy way for importers to verify that the same subtransaction is still
1151 : : * running.
1152 : : */
1153 [ # # ]: 0 : if (IsSubTransaction())
1154 [ # # # # ]: 0 : ereport(ERROR,
1155 : : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1156 : : errmsg("cannot export a snapshot from a subtransaction")));
1157 : :
1158 : : /*
1159 : : * We do however allow previous committed subtransactions to exist.
1160 : : * Importers of the snapshot must see them as still running, so get their
1161 : : * XIDs to add them to the snapshot.
1162 : : */
1163 : 0 : nchildren = xactGetCommittedChildren(&children);
1164 : :
1165 : : /*
1166 : : * Generate file path for the snapshot. We start numbering of snapshots
1167 : : * inside the transaction from 1.
1168 : : */
1169 : 0 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1170 : 0 : MyProc->vxid.procNumber, MyProc->vxid.lxid,
1171 : 0 : list_length(exportedSnapshots) + 1);
1172 : :
1173 : : /*
1174 : : * Copy the snapshot into TopTransactionContext, add it to the
1175 : : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1176 : : * ensure that the snapshot's xmin is honored for the rest of the
1177 : : * transaction.
1178 : : */
1179 : 0 : snapshot = CopySnapshot(snapshot);
1180 : :
1181 : 0 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1182 : 0 : esnap = palloc_object(ExportedSnapshot);
1183 : 0 : esnap->snapfile = pstrdup(path);
1184 : 0 : esnap->snapshot = snapshot;
1185 : 0 : exportedSnapshots = lappend(exportedSnapshots, esnap);
1186 : 0 : MemoryContextSwitchTo(oldcxt);
1187 : :
1188 : 0 : snapshot->regd_count++;
1189 : 0 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1190 : :
1191 : : /*
1192 : : * Fill buf with a text serialization of the snapshot, plus identification
1193 : : * data about this transaction. The format expected by ImportSnapshot is
1194 : : * pretty rigid: each line must be fieldname:value.
1195 : : */
1196 : 0 : initStringInfo(&buf);
1197 : :
1198 : 0 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1199 : 0 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1200 : 0 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1201 : 0 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1202 : 0 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1203 : :
1204 : 0 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1205 : 0 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1206 : :
1207 : : /*
1208 : : * We must include our own top transaction ID in the top-xid data, since
1209 : : * by definition we will still be running when the importing transaction
1210 : : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1211 : : * the snapshot. (There must, therefore, be enough room to add it.)
1212 : : *
1213 : : * However, it could be that our topXid is after the xmax, in which case
1214 : : * we shouldn't include it because xip[] members are expected to be before
1215 : : * xmax. (We need not make the same check for subxip[] members, see
1216 : : * snapshot.h.)
1217 : : */
1218 [ # # ]: 0 : addTopXid = (TransactionIdIsValid(topXid) &&
1219 : 0 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1220 : 0 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1221 [ # # ]: 0 : for (i = 0; i < snapshot->xcnt; i++)
1222 : 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1223 [ # # ]: 0 : if (addTopXid)
1224 : 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
1225 : :
1226 : : /*
1227 : : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1228 : : * we have to cope with possible overflow.
1229 : : */
1230 [ # # # # ]: 0 : if (snapshot->suboverflowed ||
1231 : 0 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1232 : 0 : appendStringInfoString(&buf, "sof:1\n");
1233 : : else
1234 : : {
1235 : 0 : appendStringInfoString(&buf, "sof:0\n");
1236 : 0 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1237 [ # # ]: 0 : for (i = 0; i < snapshot->subxcnt; i++)
1238 : 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1239 [ # # ]: 0 : for (i = 0; i < nchildren; i++)
1240 : 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
1241 : : }
1242 : 0 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1243 : :
1244 : : /*
1245 : : * Now write the text representation into a file. We first write to a
1246 : : * ".tmp" filename, and rename to final filename if no error. This
1247 : : * ensures that no other backend can read an incomplete file
1248 : : * (ImportSnapshot won't allow it because of its valid-characters check).
1249 : : */
1250 : 0 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1251 [ # # ]: 0 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1252 [ # # # # ]: 0 : ereport(ERROR,
1253 : : (errcode_for_file_access(),
1254 : : errmsg("could not create file \"%s\": %m", pathtmp)));
1255 : :
1256 [ # # ]: 0 : if (fwrite(buf.data, buf.len, 1, f) != 1)
1257 [ # # # # ]: 0 : ereport(ERROR,
1258 : : (errcode_for_file_access(),
1259 : : errmsg("could not write to file \"%s\": %m", pathtmp)));
1260 : :
1261 : : /* no fsync() since file need not survive a system crash */
1262 : :
1263 [ # # ]: 0 : if (FreeFile(f))
1264 [ # # # # ]: 0 : ereport(ERROR,
1265 : : (errcode_for_file_access(),
1266 : : errmsg("could not write to file \"%s\": %m", pathtmp)));
1267 : :
1268 : : /*
1269 : : * Now that we have written everything into a .tmp file, rename the file
1270 : : * to remove the .tmp suffix.
1271 : : */
1272 [ # # ]: 0 : if (rename(pathtmp, path) < 0)
1273 [ # # # # ]: 0 : ereport(ERROR,
1274 : : (errcode_for_file_access(),
1275 : : errmsg("could not rename file \"%s\" to \"%s\": %m",
1276 : : pathtmp, path)));
1277 : :
1278 : : /*
1279 : : * The basename of the file is what we return from pg_export_snapshot().
1280 : : * It's already in path in a textual format and we know that the path
1281 : : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1282 : : * and pstrdup it so as not to return the address of a local variable.
1283 : : */
1284 : 0 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1285 : 0 : }
1286 : :
1287 : : /*
1288 : : * pg_export_snapshot
1289 : : * SQL-callable wrapper for ExportSnapshot.
1290 : : */
1291 : : Datum
1292 : 0 : pg_export_snapshot(PG_FUNCTION_ARGS)
1293 : : {
1294 : 0 : char *snapshotName;
1295 : :
1296 : 0 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1297 : 0 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1298 : 0 : }
1299 : :
1300 : :
1301 : : /*
1302 : : * Parsing subroutines for ImportSnapshot: parse a line with the given
1303 : : * prefix followed by a value, and advance *s to the next line. The
1304 : : * filename is provided for use in error messages.
1305 : : */
1306 : : static int
1307 : 0 : parseIntFromText(const char *prefix, char **s, const char *filename)
1308 : : {
1309 : 0 : char *ptr = *s;
1310 : 0 : int prefixlen = strlen(prefix);
1311 : 0 : int val;
1312 : :
1313 [ # # ]: 0 : if (strncmp(ptr, prefix, prefixlen) != 0)
1314 [ # # # # ]: 0 : ereport(ERROR,
1315 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1316 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1317 : 0 : ptr += prefixlen;
1318 [ # # ]: 0 : if (sscanf(ptr, "%d", &val) != 1)
1319 [ # # # # ]: 0 : ereport(ERROR,
1320 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1321 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1322 : 0 : ptr = strchr(ptr, '\n');
1323 [ # # ]: 0 : if (!ptr)
1324 [ # # # # ]: 0 : ereport(ERROR,
1325 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1326 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1327 : 0 : *s = ptr + 1;
1328 : 0 : return val;
1329 : 0 : }
1330 : :
1331 : : static TransactionId
1332 : 0 : parseXidFromText(const char *prefix, char **s, const char *filename)
1333 : : {
1334 : 0 : char *ptr = *s;
1335 : 0 : int prefixlen = strlen(prefix);
1336 : 0 : TransactionId val;
1337 : :
1338 [ # # ]: 0 : if (strncmp(ptr, prefix, prefixlen) != 0)
1339 [ # # # # ]: 0 : ereport(ERROR,
1340 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1341 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1342 : 0 : ptr += prefixlen;
1343 [ # # ]: 0 : if (sscanf(ptr, "%u", &val) != 1)
1344 [ # # # # ]: 0 : ereport(ERROR,
1345 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1346 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1347 : 0 : ptr = strchr(ptr, '\n');
1348 [ # # ]: 0 : if (!ptr)
1349 [ # # # # ]: 0 : ereport(ERROR,
1350 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1351 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1352 : 0 : *s = ptr + 1;
1353 : 0 : return val;
1354 : 0 : }
1355 : :
1356 : : static void
1357 : 0 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1358 : : VirtualTransactionId *vxid)
1359 : : {
1360 : 0 : char *ptr = *s;
1361 : 0 : int prefixlen = strlen(prefix);
1362 : :
1363 [ # # ]: 0 : if (strncmp(ptr, prefix, prefixlen) != 0)
1364 [ # # # # ]: 0 : ereport(ERROR,
1365 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1366 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1367 : 0 : ptr += prefixlen;
1368 [ # # ]: 0 : if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1369 [ # # # # ]: 0 : ereport(ERROR,
1370 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1371 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1372 : 0 : ptr = strchr(ptr, '\n');
1373 [ # # ]: 0 : if (!ptr)
1374 [ # # # # ]: 0 : ereport(ERROR,
1375 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1376 : : errmsg("invalid snapshot data in file \"%s\"", filename)));
1377 : 0 : *s = ptr + 1;
1378 : 0 : }
1379 : :
1380 : : /*
1381 : : * ImportSnapshot
1382 : : * Import a previously exported snapshot. The argument should be a
1383 : : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1384 : : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1385 : : */
1386 : : void
1387 : 2 : ImportSnapshot(const char *idstr)
1388 : : {
1389 : 2 : char path[MAXPGPATH];
1390 : 2 : FILE *f;
1391 : 2 : struct stat stat_buf;
1392 : 2 : char *filebuf;
1393 : 2 : int xcnt;
1394 : 2 : int i;
1395 : 2 : VirtualTransactionId src_vxid;
1396 : 2 : int src_pid;
1397 : 2 : Oid src_dbid;
1398 : 2 : int src_isolevel;
1399 : 2 : bool src_readonly;
1400 : 2 : SnapshotData snapshot;
1401 : :
1402 : : /*
1403 : : * Must be at top level of a fresh transaction. Note in particular that
1404 : : * we check we haven't acquired an XID --- if we have, it's conceivable
1405 : : * that the snapshot would show it as not running, making for very screwy
1406 : : * behavior.
1407 : : */
1408 [ + - ]: 2 : if (FirstSnapshotSet ||
1409 : 2 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
1410 : 2 : IsSubTransaction())
1411 [ # # # # ]: 0 : ereport(ERROR,
1412 : : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1413 : : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1414 : :
1415 : : /*
1416 : : * If we are in read committed mode then the next query would execute with
1417 : : * a new snapshot thus making this function call quite useless.
1418 : : */
1419 [ + - ]: 2 : if (!IsolationUsesXactSnapshot())
1420 [ # # # # ]: 0 : ereport(ERROR,
1421 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1422 : : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1423 : :
1424 : : /*
1425 : : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1426 : : * this mainly to prevent reading arbitrary files.
1427 : : */
1428 [ + + ]: 2 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1429 [ + - + - ]: 1 : ereport(ERROR,
1430 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1431 : : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1432 : :
1433 : : /* OK, read the file */
1434 : 1 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1435 : :
1436 : 1 : f = AllocateFile(path, PG_BINARY_R);
1437 [ - + ]: 1 : if (!f)
1438 : : {
1439 : : /*
1440 : : * If file is missing while identifier has a correct format, avoid
1441 : : * system errors.
1442 : : */
1443 [ + - ]: 1 : if (errno == ENOENT)
1444 [ + - + - ]: 1 : ereport(ERROR,
1445 : : (errcode(ERRCODE_UNDEFINED_OBJECT),
1446 : : errmsg("snapshot \"%s\" does not exist", idstr)));
1447 : : else
1448 [ # # # # ]: 0 : ereport(ERROR,
1449 : : (errcode_for_file_access(),
1450 : : errmsg("could not open file \"%s\" for reading: %m",
1451 : : path)));
1452 : 0 : }
1453 : :
1454 : : /* get the size of the file so that we know how much memory we need */
1455 [ # # ]: 0 : if (fstat(fileno(f), &stat_buf))
1456 [ # # # # ]: 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
1457 : :
1458 : : /* and read the file into a palloc'd string */
1459 : 0 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1460 [ # # ]: 0 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1461 [ # # # # ]: 0 : elog(ERROR, "could not read file \"%s\": %m", path);
1462 : :
1463 : 0 : filebuf[stat_buf.st_size] = '\0';
1464 : :
1465 : 0 : FreeFile(f);
1466 : :
1467 : : /*
1468 : : * Construct a snapshot struct by parsing the file content.
1469 : : */
1470 : 0 : memset(&snapshot, 0, sizeof(snapshot));
1471 : :
1472 : 0 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1473 : 0 : src_pid = parseIntFromText("pid:", &filebuf, path);
1474 : : /* we abuse parseXidFromText a bit here ... */
1475 : 0 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
1476 : 0 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
1477 : 0 : src_readonly = parseIntFromText("ro:", &filebuf, path);
1478 : :
1479 : 0 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1480 : :
1481 : 0 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1482 : 0 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1483 : :
1484 : 0 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1485 : :
1486 : : /* sanity-check the xid count before palloc */
1487 [ # # ]: 0 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1488 [ # # # # ]: 0 : ereport(ERROR,
1489 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1490 : : errmsg("invalid snapshot data in file \"%s\"", path)));
1491 : :
1492 : 0 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1493 [ # # ]: 0 : for (i = 0; i < xcnt; i++)
1494 : 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1495 : :
1496 : 0 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1497 : :
1498 [ # # ]: 0 : if (!snapshot.suboverflowed)
1499 : : {
1500 : 0 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1501 : :
1502 : : /* sanity-check the xid count before palloc */
1503 [ # # ]: 0 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1504 [ # # # # ]: 0 : ereport(ERROR,
1505 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1506 : : errmsg("invalid snapshot data in file \"%s\"", path)));
1507 : :
1508 : 0 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1509 [ # # ]: 0 : for (i = 0; i < xcnt; i++)
1510 : 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1511 : 0 : }
1512 : : else
1513 : : {
1514 : 0 : snapshot.subxcnt = 0;
1515 : 0 : snapshot.subxip = NULL;
1516 : : }
1517 : :
1518 : 0 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1519 : :
1520 : : /*
1521 : : * Do some additional sanity checking, just to protect ourselves. We
1522 : : * don't trouble to check the array elements, just the most critical
1523 : : * fields.
1524 : : */
1525 [ # # ]: 0 : if (!VirtualTransactionIdIsValid(src_vxid) ||
1526 : 0 : !OidIsValid(src_dbid) ||
1527 : 0 : !TransactionIdIsNormal(snapshot.xmin) ||
1528 : 0 : !TransactionIdIsNormal(snapshot.xmax))
1529 [ # # # # ]: 0 : ereport(ERROR,
1530 : : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1531 : : errmsg("invalid snapshot data in file \"%s\"", path)));
1532 : :
1533 : : /*
1534 : : * If we're serializable, the source transaction must be too, otherwise
1535 : : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1536 : : * non-read-only transaction can't adopt a snapshot from a read-only
1537 : : * transaction, as predicate.c handles the cases very differently.
1538 : : */
1539 [ # # ]: 0 : if (IsolationIsSerializable())
1540 : : {
1541 [ # # ]: 0 : if (src_isolevel != XACT_SERIALIZABLE)
1542 [ # # # # ]: 0 : ereport(ERROR,
1543 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1544 : : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1545 [ # # # # ]: 0 : if (src_readonly && !XactReadOnly)
1546 [ # # # # ]: 0 : ereport(ERROR,
1547 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1548 : : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1549 : 0 : }
1550 : :
1551 : : /*
1552 : : * We cannot import a snapshot that was taken in a different database,
1553 : : * because vacuum calculates OldestXmin on a per-database basis; so the
1554 : : * source transaction's xmin doesn't protect us from data loss. This
1555 : : * restriction could be removed if the source transaction were to mark its
1556 : : * xmin as being globally applicable. But that would require some
1557 : : * additional syntax, since that has to be known when the snapshot is
1558 : : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1559 : : */
1560 [ # # ]: 0 : if (src_dbid != MyDatabaseId)
1561 [ # # # # ]: 0 : ereport(ERROR,
1562 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1563 : : errmsg("cannot import a snapshot from a different database")));
1564 : :
1565 : : /* OK, install the snapshot */
1566 : 0 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1567 : 0 : }
1568 : :
1569 : : /*
1570 : : * XactHasExportedSnapshots
1571 : : * Test whether current transaction has exported any snapshots.
1572 : : */
1573 : : bool
1574 : 16 : XactHasExportedSnapshots(void)
1575 : : {
1576 : 16 : return (exportedSnapshots != NIL);
1577 : : }
1578 : :
1579 : : /*
1580 : : * DeleteAllExportedSnapshotFiles
1581 : : * Clean up any files that have been left behind by a crashed backend
1582 : : * that had exported snapshots before it died.
1583 : : *
1584 : : * This should be called during database startup or crash recovery.
1585 : : */
1586 : : void
1587 : 0 : DeleteAllExportedSnapshotFiles(void)
1588 : : {
1589 : 0 : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1590 : 0 : DIR *s_dir;
1591 : 0 : struct dirent *s_de;
1592 : :
1593 : : /*
1594 : : * Problems in reading the directory, or unlinking files, are reported at
1595 : : * LOG level. Since we're running in the startup process, ERROR level
1596 : : * would prevent database start, and it's not important enough for that.
1597 : : */
1598 : 0 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1599 : :
1600 [ # # ]: 0 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1601 : : {
1602 [ # # # # ]: 0 : if (strcmp(s_de->d_name, ".") == 0 ||
1603 : 0 : strcmp(s_de->d_name, "..") == 0)
1604 : 0 : continue;
1605 : :
1606 : 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1607 : :
1608 [ # # ]: 0 : if (unlink(buf) != 0)
1609 [ # # # # ]: 0 : ereport(LOG,
1610 : : (errcode_for_file_access(),
1611 : : errmsg("could not remove file \"%s\": %m", buf)));
1612 : : }
1613 : :
1614 : 0 : FreeDir(s_dir);
1615 : 0 : }
1616 : :
1617 : : /*
1618 : : * ThereAreNoPriorRegisteredSnapshots
1619 : : * Is the registered snapshot count less than or equal to one?
1620 : : *
1621 : : * Don't use this to settle important decisions. While zero registrations and
1622 : : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1623 : : * guarantees about the significance of one registered snapshot.
1624 : : */
1625 : : bool
1626 : 7 : ThereAreNoPriorRegisteredSnapshots(void)
1627 : : {
1628 [ - + # # ]: 7 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
1629 [ # # ]: 0 : pairingheap_is_singular(&RegisteredSnapshots))
1630 : 7 : return true;
1631 : :
1632 : 0 : return false;
1633 : 7 : }
1634 : :
1635 : : /*
1636 : : * HaveRegisteredOrActiveSnapshot
1637 : : * Is there any registered or active snapshot?
1638 : : *
1639 : : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1640 : : * this function to return true. That allows this function to be used in
1641 : : * checks enforcing a longer-lived snapshot.
1642 : : */
1643 : : bool
1644 : 772608 : HaveRegisteredOrActiveSnapshot(void)
1645 : : {
1646 [ + + ]: 772608 : if (ActiveSnapshot != NULL)
1647 : 772606 : return true;
1648 : :
1649 : : /*
1650 : : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1651 : : * removed at any time due to invalidation processing. If explicitly
1652 : : * registered more than one snapshot has to be in RegisteredSnapshots.
1653 : : */
1654 [ + + + - ]: 3 : if (CatalogSnapshot != NULL &&
1655 [ + - ]: 1 : pairingheap_is_singular(&RegisteredSnapshots))
1656 : 0 : return false;
1657 : :
1658 : 2 : return !pairingheap_is_empty(&RegisteredSnapshots);
1659 : 772608 : }
1660 : :
1661 : :
1662 : : /*
1663 : : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1664 : : * access to behave just like it did at a certain point in the past.
1665 : : *
1666 : : * Needed for logical decoding.
1667 : : */
1668 : : void
1669 : 0 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1670 : : {
1671 [ # # ]: 0 : Assert(historic_snapshot != NULL);
1672 : :
1673 : : /* setup the timetravel snapshot */
1674 : 0 : HistoricSnapshot = historic_snapshot;
1675 : :
1676 : : /* setup (cmin, cmax) lookup hash */
1677 : 0 : tuplecid_data = tuplecids;
1678 : 0 : }
1679 : :
1680 : :
1681 : : /*
1682 : : * Make catalog snapshots behave normally again.
1683 : : */
1684 : : void
1685 : 0 : TeardownHistoricSnapshot(bool is_error)
1686 : : {
1687 : 0 : HistoricSnapshot = NULL;
1688 : 0 : tuplecid_data = NULL;
1689 : 0 : }
1690 : :
1691 : : bool
1692 : 1564956 : HistoricSnapshotActive(void)
1693 : : {
1694 : 1564956 : return HistoricSnapshot != NULL;
1695 : : }
1696 : :
1697 : : HTAB *
1698 : 0 : HistoricSnapshotGetTupleCids(void)
1699 : : {
1700 [ # # ]: 0 : Assert(HistoricSnapshotActive());
1701 : 0 : return tuplecid_data;
1702 : : }
1703 : :
1704 : : /*
1705 : : * EstimateSnapshotSpace
1706 : : * Returns the size needed to store the given snapshot.
1707 : : *
1708 : : * We are exporting only required fields from the Snapshot, stored in
1709 : : * SerializedSnapshotData.
1710 : : */
1711 : : Size
1712 : 420 : EstimateSnapshotSpace(Snapshot snapshot)
1713 : : {
1714 : 420 : Size size;
1715 : :
1716 [ + - ]: 420 : Assert(snapshot != InvalidSnapshot);
1717 [ + - ]: 420 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1718 : :
1719 : : /* We allocate any XID arrays needed in the same palloc block. */
1720 : 420 : size = add_size(sizeof(SerializedSnapshotData),
1721 : 420 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
1722 [ - + # # ]: 420 : if (snapshot->subxcnt > 0 &&
1723 [ # # ]: 0 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1724 : 0 : size = add_size(size,
1725 : 0 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1726 : :
1727 : 840 : return size;
1728 : 420 : }
1729 : :
1730 : : /*
1731 : : * SerializeSnapshot
1732 : : * Dumps the serialized snapshot (extracted from given snapshot) onto the
1733 : : * memory location at start_address.
1734 : : */
1735 : : void
1736 : 365 : SerializeSnapshot(Snapshot snapshot, char *start_address)
1737 : : {
1738 : 365 : SerializedSnapshotData serialized_snapshot;
1739 : :
1740 [ + - ]: 365 : Assert(snapshot->subxcnt >= 0);
1741 : :
1742 : : /* Copy all required fields */
1743 : 365 : serialized_snapshot.xmin = snapshot->xmin;
1744 : 365 : serialized_snapshot.xmax = snapshot->xmax;
1745 : 365 : serialized_snapshot.xcnt = snapshot->xcnt;
1746 : 365 : serialized_snapshot.subxcnt = snapshot->subxcnt;
1747 : 365 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1748 : 365 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1749 : 365 : serialized_snapshot.curcid = snapshot->curcid;
1750 : :
1751 : : /*
1752 : : * Ignore the SubXID array if it has overflowed, unless the snapshot was
1753 : : * taken during recovery - in that case, top-level XIDs are in subxip as
1754 : : * well, and we mustn't lose them.
1755 : : */
1756 [ - + # # ]: 365 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1757 : 0 : serialized_snapshot.subxcnt = 0;
1758 : :
1759 : : /* Copy struct to possibly-unaligned buffer */
1760 : 365 : memcpy(start_address,
1761 : : &serialized_snapshot, sizeof(SerializedSnapshotData));
1762 : :
1763 : : /* Copy XID array */
1764 [ + + ]: 365 : if (snapshot->xcnt > 0)
1765 : 113 : memcpy((TransactionId *) (start_address +
1766 : : sizeof(SerializedSnapshotData)),
1767 : : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1768 : :
1769 : : /*
1770 : : * Copy SubXID array. Don't bother to copy it if it had overflowed,
1771 : : * though, because it's not used anywhere in that case. Except if it's a
1772 : : * snapshot taken during recovery; all the top-level XIDs are in subxip as
1773 : : * well in that case, so we mustn't lose them.
1774 : : */
1775 [ + - ]: 365 : if (serialized_snapshot.subxcnt > 0)
1776 : : {
1777 : 0 : Size subxipoff = sizeof(SerializedSnapshotData) +
1778 : 0 : snapshot->xcnt * sizeof(TransactionId);
1779 : :
1780 : 0 : memcpy((TransactionId *) (start_address + subxipoff),
1781 : : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1782 : 0 : }
1783 : 365 : }
1784 : :
1785 : : /*
1786 : : * RestoreSnapshot
1787 : : * Restore a serialized snapshot from the specified address.
1788 : : *
1789 : : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1790 : : * to 0. The returned snapshot has the copied flag set.
1791 : : */
1792 : : Snapshot
1793 : 1163 : RestoreSnapshot(char *start_address)
1794 : : {
1795 : 1163 : SerializedSnapshotData serialized_snapshot;
1796 : 1163 : Size size;
1797 : 1163 : Snapshot snapshot;
1798 : 1163 : TransactionId *serialized_xids;
1799 : :
1800 : 1163 : memcpy(&serialized_snapshot, start_address,
1801 : : sizeof(SerializedSnapshotData));
1802 : 1163 : serialized_xids = (TransactionId *)
1803 : 1163 : (start_address + sizeof(SerializedSnapshotData));
1804 : :
1805 : : /* We allocate any XID arrays needed in the same palloc block. */
1806 : 1163 : size = sizeof(SnapshotData)
1807 : 1163 : + serialized_snapshot.xcnt * sizeof(TransactionId)
1808 : 1163 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
1809 : :
1810 : : /* Copy all required fields */
1811 : 1163 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1812 : 1163 : snapshot->snapshot_type = SNAPSHOT_MVCC;
1813 : 1163 : snapshot->xmin = serialized_snapshot.xmin;
1814 : 1163 : snapshot->xmax = serialized_snapshot.xmax;
1815 : 1163 : snapshot->xip = NULL;
1816 : 1163 : snapshot->xcnt = serialized_snapshot.xcnt;
1817 : 1163 : snapshot->subxip = NULL;
1818 : 1163 : snapshot->subxcnt = serialized_snapshot.subxcnt;
1819 : 1163 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1820 : 1163 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1821 : 1163 : snapshot->curcid = serialized_snapshot.curcid;
1822 : 1163 : snapshot->snapXactCompletionCount = 0;
1823 : :
1824 : : /* Copy XIDs, if present. */
1825 [ + + ]: 1163 : if (serialized_snapshot.xcnt > 0)
1826 : : {
1827 : 288 : snapshot->xip = (TransactionId *) (snapshot + 1);
1828 : 288 : memcpy(snapshot->xip, serialized_xids,
1829 : : serialized_snapshot.xcnt * sizeof(TransactionId));
1830 : 288 : }
1831 : :
1832 : : /* Copy SubXIDs, if present. */
1833 [ + - ]: 1163 : if (serialized_snapshot.subxcnt > 0)
1834 : : {
1835 : 0 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1836 : 0 : serialized_snapshot.xcnt;
1837 : 0 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1838 : : serialized_snapshot.subxcnt * sizeof(TransactionId));
1839 : 0 : }
1840 : :
1841 : : /* Set the copied flag so that the caller will set refcounts correctly. */
1842 : 1163 : snapshot->regd_count = 0;
1843 : 1163 : snapshot->active_count = 0;
1844 : 1163 : snapshot->copied = true;
1845 : :
1846 : 2326 : return snapshot;
1847 : 1163 : }
1848 : :
1849 : : /*
1850 : : * Install a restored snapshot as the transaction snapshot.
1851 : : */
1852 : : void
1853 : 477 : RestoreTransactionSnapshot(Snapshot snapshot, PGPROC *source_pgproc)
1854 : : {
1855 : 477 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1856 : 477 : }
1857 : :
1858 : : /*
1859 : : * XidInMVCCSnapshot
1860 : : * Is the given XID still-in-progress according to the snapshot?
1861 : : *
1862 : : * Note: GetSnapshotData never stores either top xid or subxids of our own
1863 : : * backend into a snapshot, so these xids will not be reported as "running"
1864 : : * by this function. This is OK for current uses, because we always check
1865 : : * TransactionIdIsCurrentTransactionId first, except when it's known the
1866 : : * XID could not be ours anyway.
1867 : : */
1868 : : bool
1869 : 12198763 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1870 : : {
1871 : : /*
1872 : : * Make a quick range check to eliminate most XIDs without looking at the
1873 : : * xip arrays. Note that this is OK even if we convert a subxact XID to
1874 : : * its parent below, because a subxact with XID < xmin has surely also got
1875 : : * a parent with XID < xmin, while one with XID >= xmax must belong to a
1876 : : * parent that was not yet committed at the time of this snapshot.
1877 : : */
1878 : :
1879 : : /* Any xid < xmin is not in-progress */
1880 [ + + ]: 12198763 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1881 : 10560427 : return false;
1882 : : /* Any xid >= xmax is in-progress */
1883 [ + + ]: 1638336 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1884 : 3782 : return true;
1885 : :
1886 : : /*
1887 : : * Snapshot information is stored slightly differently in snapshots taken
1888 : : * during recovery.
1889 : : */
1890 [ - + ]: 1634554 : if (!snapshot->takenDuringRecovery)
1891 : : {
1892 : : /*
1893 : : * If the snapshot contains full subxact data, the fastest way to
1894 : : * check things is just to compare the given XID against both subxact
1895 : : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1896 : : * use pg_subtrans to convert a subxact XID to its parent XID, but
1897 : : * then we need only look at top-level XIDs not subxacts.
1898 : : */
1899 [ - + ]: 1634554 : if (!snapshot->suboverflowed)
1900 : : {
1901 : : /* we have full data, so search subxip */
1902 [ + + ]: 1634554 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1903 : 13 : return true;
1904 : :
1905 : : /* not there, fall through to search xip[] */
1906 : 1634541 : }
1907 : : else
1908 : : {
1909 : : /*
1910 : : * Snapshot overflowed, so convert xid to top-level. This is safe
1911 : : * because we eliminated too-old XIDs above.
1912 : : */
1913 : 0 : xid = SubTransGetTopmostTransaction(xid);
1914 : :
1915 : : /*
1916 : : * If xid was indeed a subxact, we might now have an xid < xmin,
1917 : : * so recheck to avoid an array scan. No point in rechecking
1918 : : * xmax.
1919 : : */
1920 [ # # ]: 0 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1921 : 0 : return false;
1922 : : }
1923 : :
1924 [ + + ]: 1634541 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1925 : 6057 : return true;
1926 : 1628484 : }
1927 : : else
1928 : : {
1929 : : /*
1930 : : * In recovery we store all xids in the subxip array because it is by
1931 : : * far the bigger array, and we mostly don't know which xids are
1932 : : * top-level and which are subxacts. The xip array is empty.
1933 : : *
1934 : : * We start by searching subtrans, if we overflowed.
1935 : : */
1936 [ # # ]: 0 : if (snapshot->suboverflowed)
1937 : : {
1938 : : /*
1939 : : * Snapshot overflowed, so convert xid to top-level. This is safe
1940 : : * because we eliminated too-old XIDs above.
1941 : : */
1942 : 0 : xid = SubTransGetTopmostTransaction(xid);
1943 : :
1944 : : /*
1945 : : * If xid was indeed a subxact, we might now have an xid < xmin,
1946 : : * so recheck to avoid an array scan. No point in rechecking
1947 : : * xmax.
1948 : : */
1949 [ # # ]: 0 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1950 : 0 : return false;
1951 : 0 : }
1952 : :
1953 : : /*
1954 : : * We now have either a top-level xid higher than xmin or an
1955 : : * indeterminate xid. We don't know whether it's top level or subxact
1956 : : * but it doesn't matter. If it's present, the xid is visible.
1957 : : */
1958 [ # # ]: 0 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1959 : 0 : return true;
1960 : : }
1961 : :
1962 : 1628484 : return false;
1963 : 12198763 : }
1964 : :
1965 : : /* ResourceOwner callbacks */
1966 : :
1967 : : static void
1968 : 13884 : ResOwnerReleaseSnapshot(Datum res)
1969 : : {
1970 : 13884 : UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1971 : 13884 : }
|