Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * cluster.c
4 : : * CLUSTER a table on an index. This is now also used for VACUUM FULL.
5 : : *
6 : : * There is hardly anything left of Paul Brown's original implementation...
7 : : *
8 : : *
9 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
10 : : * Portions Copyright (c) 1994-5, Regents of the University of California
11 : : *
12 : : *
13 : : * IDENTIFICATION
14 : : * src/backend/commands/cluster.c
15 : : *
16 : : *-------------------------------------------------------------------------
17 : : */
18 : : #include "postgres.h"
19 : :
20 : : #include "access/amapi.h"
21 : : #include "access/heapam.h"
22 : : #include "access/multixact.h"
23 : : #include "access/relscan.h"
24 : : #include "access/tableam.h"
25 : : #include "access/toast_internals.h"
26 : : #include "access/transam.h"
27 : : #include "access/xact.h"
28 : : #include "catalog/catalog.h"
29 : : #include "catalog/dependency.h"
30 : : #include "catalog/heap.h"
31 : : #include "catalog/index.h"
32 : : #include "catalog/namespace.h"
33 : : #include "catalog/objectaccess.h"
34 : : #include "catalog/pg_am.h"
35 : : #include "catalog/pg_inherits.h"
36 : : #include "catalog/toasting.h"
37 : : #include "commands/cluster.h"
38 : : #include "commands/defrem.h"
39 : : #include "commands/progress.h"
40 : : #include "commands/tablecmds.h"
41 : : #include "commands/vacuum.h"
42 : : #include "miscadmin.h"
43 : : #include "optimizer/optimizer.h"
44 : : #include "pgstat.h"
45 : : #include "storage/bufmgr.h"
46 : : #include "storage/lmgr.h"
47 : : #include "storage/predicate.h"
48 : : #include "utils/acl.h"
49 : : #include "utils/fmgroids.h"
50 : : #include "utils/guc.h"
51 : : #include "utils/inval.h"
52 : : #include "utils/lsyscache.h"
53 : : #include "utils/memutils.h"
54 : : #include "utils/pg_rusage.h"
55 : : #include "utils/relmapper.h"
56 : : #include "utils/snapmgr.h"
57 : : #include "utils/syscache.h"
58 : :
59 : : /*
60 : : * This struct is used to pass around the information on tables to be
61 : : * clustered. We need this so we can make a list of them when invoked without
62 : : * a specific table/index pair.
63 : : */
64 : : typedef struct
65 : : {
66 : : Oid tableOid;
67 : : Oid indexOid;
68 : : } RelToCluster;
69 : :
70 : :
71 : : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
72 : : static void rebuild_relation(Relation OldHeap, Relation index, bool verbose);
73 : : static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex,
74 : : bool verbose, bool *pSwapToastByContent,
75 : : TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
76 : : static List *get_tables_to_cluster(MemoryContext cluster_context);
77 : : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
78 : : Oid indexOid);
79 : : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
80 : :
81 : :
82 : : /*---------------------------------------------------------------------------
83 : : * This cluster code allows for clustering multiple tables at once. Because
84 : : * of this, we cannot just run everything on a single transaction, or we
85 : : * would be forced to acquire exclusive locks on all the tables being
86 : : * clustered, simultaneously --- very likely leading to deadlock.
87 : : *
88 : : * To solve this we follow a similar strategy to VACUUM code,
89 : : * clustering each relation in a separate transaction. For this to work,
90 : : * we need to:
91 : : * - provide a separate memory context so that we can pass information in
92 : : * a way that survives across transactions
93 : : * - start a new transaction every time a new relation is clustered
94 : : * - check for validity of the information on to-be-clustered relations,
95 : : * as someone might have deleted a relation behind our back, or
96 : : * clustered one on a different index
97 : : * - end the transaction
98 : : *
99 : : * The single-relation case does not have any such overhead.
100 : : *
101 : : * We also allow a relation to be specified without index. In that case,
102 : : * the indisclustered bit will be looked up, and an ERROR will be thrown
103 : : * if there is no index with the bit set.
104 : : *---------------------------------------------------------------------------
105 : : */
106 : : void
107 : 28 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
108 : : {
109 : 28 : ListCell *lc;
110 : 28 : ClusterParams params = {0};
111 : 28 : bool verbose = false;
112 : 28 : Relation rel = NULL;
113 : 28 : Oid indexOid = InvalidOid;
114 : 28 : MemoryContext cluster_context;
115 : 28 : List *rtcs;
116 : :
117 : : /* Parse option list */
118 [ - + # # : 30 : foreach(lc, stmt->params)
+ + ]
119 : : {
120 : 2 : DefElem *opt = (DefElem *) lfirst(lc);
121 : :
122 [ + - ]: 2 : if (strcmp(opt->defname, "verbose") == 0)
123 : 2 : verbose = defGetBoolean(opt);
124 : : else
125 [ # # # # ]: 0 : ereport(ERROR,
126 : : (errcode(ERRCODE_SYNTAX_ERROR),
127 : : errmsg("unrecognized %s option \"%s\"",
128 : : "CLUSTER", opt->defname),
129 : : parser_errposition(pstate, opt->location)));
130 : 2 : }
131 : :
132 : 28 : params.options = (verbose ? CLUOPT_VERBOSE : 0);
133 : :
134 [ + + ]: 28 : if (stmt->relation != NULL)
135 : : {
136 : : /* This is the single-relation case. */
137 : 27 : Oid tableOid;
138 : :
139 : : /*
140 : : * Find, lock, and check permissions on the table. We obtain
141 : : * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
142 : : * single-transaction case.
143 : : */
144 : 27 : tableOid = RangeVarGetRelidExtended(stmt->relation,
145 : : AccessExclusiveLock,
146 : : 0,
147 : : RangeVarCallbackMaintainsTable,
148 : : NULL);
149 : 27 : rel = table_open(tableOid, NoLock);
150 : :
151 : : /*
152 : : * Reject clustering a remote temp table ... their local buffer
153 : : * manager is not going to cope.
154 : : */
155 [ + + + - ]: 27 : if (RELATION_IS_OTHER_TEMP(rel))
156 [ # # # # ]: 0 : ereport(ERROR,
157 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
158 : : errmsg("cannot cluster temporary tables of other sessions")));
159 : :
160 [ + + ]: 27 : if (stmt->indexname == NULL)
161 : : {
162 : 5 : ListCell *index;
163 : :
164 : : /* We need to find the index that has indisclustered set. */
165 [ + - + + : 10 : foreach(index, RelationGetIndexList(rel))
+ + ]
166 : : {
167 : 5 : indexOid = lfirst_oid(index);
168 [ + + ]: 5 : if (get_index_isclustered(indexOid))
169 : 3 : break;
170 : 2 : indexOid = InvalidOid;
171 : 2 : }
172 : :
173 [ + + ]: 5 : if (!OidIsValid(indexOid))
174 [ + - + - ]: 2 : ereport(ERROR,
175 : : (errcode(ERRCODE_UNDEFINED_OBJECT),
176 : : errmsg("there is no previously clustered index for table \"%s\"",
177 : : stmt->relation->relname)));
178 : 3 : }
179 : : else
180 : : {
181 : : /*
182 : : * The index is expected to be in the same namespace as the
183 : : * relation.
184 : : */
185 : 44 : indexOid = get_relname_relid(stmt->indexname,
186 : 22 : rel->rd_rel->relnamespace);
187 [ + - ]: 22 : if (!OidIsValid(indexOid))
188 [ # # # # ]: 0 : ereport(ERROR,
189 : : (errcode(ERRCODE_UNDEFINED_OBJECT),
190 : : errmsg("index \"%s\" for table \"%s\" does not exist",
191 : : stmt->indexname, stmt->relation->relname)));
192 : : }
193 : :
194 : : /* For non-partitioned tables, do what we came here to do. */
195 [ + + ]: 25 : if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
196 : : {
197 : 22 : cluster_rel(rel, indexOid, ¶ms);
198 : : /* cluster_rel closes the relation, but keeps lock */
199 : :
200 : 22 : return;
201 : : }
202 [ + + ]: 25 : }
203 : :
204 : : /*
205 : : * By here, we know we are in a multi-table situation. In order to avoid
206 : : * holding locks for too long, we want to process each table in its own
207 : : * transaction. This forces us to disallow running inside a user
208 : : * transaction block.
209 : : */
210 : 4 : PreventInTransactionBlock(isTopLevel, "CLUSTER");
211 : :
212 : : /* Also, we need a memory context to hold our list of relations */
213 : 4 : cluster_context = AllocSetContextCreate(PortalContext,
214 : : "Cluster",
215 : : ALLOCSET_DEFAULT_SIZES);
216 : :
217 : : /*
218 : : * Either we're processing a partitioned table, or we were not given any
219 : : * table name at all. In either case, obtain a list of relations to
220 : : * process.
221 : : *
222 : : * In the former case, an index name must have been given, so we don't
223 : : * need to recheck its "indisclustered" bit, but we have to check that it
224 : : * is an index that we can cluster on. In the latter case, we set the
225 : : * option bit to have indisclustered verified.
226 : : *
227 : : * Rechecking the relation itself is necessary here in all cases.
228 : : */
229 : 4 : params.options |= CLUOPT_RECHECK;
230 [ + + ]: 4 : if (rel != NULL)
231 : : {
232 [ + - ]: 3 : Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
233 : 3 : check_index_is_clusterable(rel, indexOid, AccessShareLock);
234 : 3 : rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
235 : :
236 : : /* close relation, releasing lock on parent table */
237 : 3 : table_close(rel, AccessExclusiveLock);
238 : 3 : }
239 : : else
240 : : {
241 : 1 : rtcs = get_tables_to_cluster(cluster_context);
242 : 1 : params.options |= CLUOPT_RECHECK_ISCLUSTERED;
243 : : }
244 : :
245 : : /* Do the job. */
246 : 4 : cluster_multiple_rels(rtcs, ¶ms);
247 : :
248 : : /* Start a new transaction for the cleanup work. */
249 : 4 : StartTransactionCommand();
250 : :
251 : : /* Clean up working storage */
252 : 4 : MemoryContextDelete(cluster_context);
253 [ - + ]: 26 : }
254 : :
255 : : /*
256 : : * Given a list of relations to cluster, process each of them in a separate
257 : : * transaction.
258 : : *
259 : : * We expect to be in a transaction at start, but there isn't one when we
260 : : * return.
261 : : */
262 : : static void
263 : 3 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
264 : : {
265 : 3 : ListCell *lc;
266 : :
267 : : /* Commit to get out of starting transaction */
268 : 3 : PopActiveSnapshot();
269 : 3 : CommitTransactionCommand();
270 : :
271 : : /* Cluster the tables, each in a separate transaction */
272 [ + - + + : 8 : foreach(lc, rtcs)
+ + ]
273 : : {
274 : 5 : RelToCluster *rtc = (RelToCluster *) lfirst(lc);
275 : 5 : Relation rel;
276 : :
277 : : /* Start a new transaction for each relation. */
278 : 5 : StartTransactionCommand();
279 : :
280 : : /* functions in indexes may want a snapshot set */
281 : 5 : PushActiveSnapshot(GetTransactionSnapshot());
282 : :
283 : 5 : rel = table_open(rtc->tableOid, AccessExclusiveLock);
284 : :
285 : : /* Process this table */
286 : 5 : cluster_rel(rel, rtc->indexOid, params);
287 : : /* cluster_rel closes the relation, but keeps lock */
288 : :
289 : 5 : PopActiveSnapshot();
290 : 5 : CommitTransactionCommand();
291 : 5 : }
292 : 3 : }
293 : :
294 : : /*
295 : : * cluster_rel
296 : : *
297 : : * This clusters the table by creating a new, clustered table and
298 : : * swapping the relfilenumbers of the new table and the old table, so
299 : : * the OID of the original table is preserved. Thus we do not lose
300 : : * GRANT, inheritance nor references to this table.
301 : : *
302 : : * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
303 : : * the new table, it's better to create the indexes afterwards than to fill
304 : : * them incrementally while we load the table.
305 : : *
306 : : * If indexOid is InvalidOid, the table will be rewritten in physical order
307 : : * instead of index order. This is the new implementation of VACUUM FULL,
308 : : * and error messages should refer to the operation as VACUUM not CLUSTER.
309 : : */
310 : : void
311 : 54 : cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params)
312 : : {
313 : 54 : Oid tableOid = RelationGetRelid(OldHeap);
314 : 54 : Oid save_userid;
315 : 54 : int save_sec_context;
316 : 54 : int save_nestlevel;
317 : 54 : bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
318 : 54 : bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
319 : 54 : Relation index;
320 : :
321 [ + - ]: 54 : Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false));
322 : :
323 : : /* Check for user-requested abort. */
324 [ + - ]: 54 : CHECK_FOR_INTERRUPTS();
325 : :
326 : 54 : pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
327 [ + + ]: 54 : if (OidIsValid(indexOid))
328 : 27 : pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
329 : : PROGRESS_CLUSTER_COMMAND_CLUSTER);
330 : : else
331 : 27 : pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
332 : : PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
333 : :
334 : : /*
335 : : * Switch to the table owner's userid, so that any index functions are run
336 : : * as that user. Also lock down security-restricted operations and
337 : : * arrange to make GUC variable changes local to this command.
338 : : */
339 : 54 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
340 : 108 : SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
341 : 54 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
342 : 54 : save_nestlevel = NewGUCNestLevel();
343 : 54 : RestrictSearchPath();
344 : :
345 : : /*
346 : : * Since we may open a new transaction for each relation, we have to check
347 : : * that the relation still is what we think it is.
348 : : *
349 : : * If this is a single-transaction CLUSTER, we can skip these tests. We
350 : : * *must* skip the one on indisclustered since it would reject an attempt
351 : : * to cluster a not-previously-clustered index.
352 : : */
353 [ + + ]: 54 : if (recheck)
354 : : {
355 : : /* Check that the user still has privileges for the relation */
356 [ + - ]: 5 : if (!cluster_is_permitted_for_relation(tableOid, save_userid))
357 : : {
358 : 0 : relation_close(OldHeap, AccessExclusiveLock);
359 : 0 : goto out;
360 : : }
361 : :
362 : : /*
363 : : * Silently skip a temp table for a remote session. Only doing this
364 : : * check in the "recheck" case is appropriate (which currently means
365 : : * somebody is executing a database-wide CLUSTER or on a partitioned
366 : : * table), because there is another check in cluster() which will stop
367 : : * any attempt to cluster remote temp tables by name. There is
368 : : * another check in cluster_rel which is redundant, but we leave it
369 : : * for extra safety.
370 : : */
371 [ - + # # ]: 5 : if (RELATION_IS_OTHER_TEMP(OldHeap))
372 : : {
373 : 0 : relation_close(OldHeap, AccessExclusiveLock);
374 : 0 : goto out;
375 : : }
376 : :
377 [ - + ]: 5 : if (OidIsValid(indexOid))
378 : : {
379 : : /*
380 : : * Check that the index still exists
381 : : */
382 [ + - ]: 5 : if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
383 : : {
384 : 0 : relation_close(OldHeap, AccessExclusiveLock);
385 : 0 : goto out;
386 : : }
387 : :
388 : : /*
389 : : * Check that the index is still the one with indisclustered set,
390 : : * if needed.
391 : : */
392 [ + + + - ]: 5 : if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
393 : 1 : !get_index_isclustered(indexOid))
394 : : {
395 : 0 : relation_close(OldHeap, AccessExclusiveLock);
396 : 0 : goto out;
397 : : }
398 : 5 : }
399 : 5 : }
400 : :
401 : : /*
402 : : * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
403 : : * would work in most respects, but the index would only get marked as
404 : : * indisclustered in the current database, leading to unexpected behavior
405 : : * if CLUSTER were later invoked in another database.
406 : : */
407 [ + + + - ]: 54 : if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
408 [ # # # # ]: 0 : ereport(ERROR,
409 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
410 : : errmsg("cannot cluster a shared catalog")));
411 : :
412 : : /*
413 : : * Don't process temp tables of other backends ... their local buffer
414 : : * manager is not going to cope.
415 : : */
416 [ + + + - ]: 54 : if (RELATION_IS_OTHER_TEMP(OldHeap))
417 : : {
418 [ # # ]: 0 : if (OidIsValid(indexOid))
419 [ # # # # ]: 0 : ereport(ERROR,
420 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
421 : : errmsg("cannot cluster temporary tables of other sessions")));
422 : : else
423 [ # # # # ]: 0 : ereport(ERROR,
424 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
425 : : errmsg("cannot vacuum temporary tables of other sessions")));
426 : 0 : }
427 : :
428 : : /*
429 : : * Also check for active uses of the relation in the current transaction,
430 : : * including open scans and pending AFTER trigger events.
431 : : */
432 : 54 : CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
433 : :
434 : : /* Check heap and index are valid to cluster on */
435 [ + + ]: 54 : if (OidIsValid(indexOid))
436 : : {
437 : : /* verify the index is good and lock it */
438 : 27 : check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
439 : : /* also open it */
440 : 27 : index = index_open(indexOid, NoLock);
441 : 27 : }
442 : : else
443 : 27 : index = NULL;
444 : :
445 : : /*
446 : : * Quietly ignore the request if this is a materialized view which has not
447 : : * been populated from its query. No harm is done because there is no data
448 : : * to deal with, and we don't want to throw an error if this is part of a
449 : : * multi-relation request -- for example, CLUSTER was run on the entire
450 : : * database.
451 : : */
452 [ - + # # ]: 54 : if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
453 : 0 : !RelationIsPopulated(OldHeap))
454 : : {
455 : 0 : relation_close(OldHeap, AccessExclusiveLock);
456 : 0 : goto out;
457 : : }
458 : :
459 [ + + + - : 54 : Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
+ - ]
460 : : OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
461 : : OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
462 : :
463 : : /*
464 : : * All predicate locks on the tuples or pages are about to be made
465 : : * invalid, because we move tuples around. Promote them to relation
466 : : * locks. Predicate locks on indexes will be promoted when they are
467 : : * reindexed.
468 : : */
469 : 54 : TransferPredicateLocksToHeapRelation(OldHeap);
470 : :
471 : : /* rebuild_relation does all the dirty work */
472 : 54 : rebuild_relation(OldHeap, index, verbose);
473 : : /* rebuild_relation closes OldHeap, and index if valid */
474 : :
475 : : out:
476 : : /* Roll back any GUC changes executed by index functions */
477 : 54 : AtEOXact_GUC(false, save_nestlevel);
478 : :
479 : : /* Restore userid and security context */
480 : 54 : SetUserIdAndSecContext(save_userid, save_sec_context);
481 : :
482 : 54 : pgstat_progress_end_command();
483 : 54 : }
484 : :
485 : : /*
486 : : * Verify that the specified heap and index are valid to cluster on
487 : : *
488 : : * Side effect: obtains lock on the index. The caller may
489 : : * in some cases already have AccessExclusiveLock on the table, but
490 : : * not in all cases so we can't rely on the table-level lock for
491 : : * protection here.
492 : : */
493 : : void
494 : 40 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
495 : : {
496 : 40 : Relation OldIndex;
497 : :
498 : 40 : OldIndex = index_open(indexOid, lockmode);
499 : :
500 : : /*
501 : : * Check that index is in fact an index on the given relation
502 : : */
503 [ + - ]: 40 : if (OldIndex->rd_index == NULL ||
504 : 40 : OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
505 [ # # # # ]: 0 : ereport(ERROR,
506 : : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
507 : : errmsg("\"%s\" is not an index for table \"%s\"",
508 : : RelationGetRelationName(OldIndex),
509 : : RelationGetRelationName(OldHeap))));
510 : :
511 : : /* Index AM must allow clustering */
512 [ + - ]: 40 : if (!OldIndex->rd_indam->amclusterable)
513 [ # # # # ]: 0 : ereport(ERROR,
514 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
515 : : errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
516 : : RelationGetRelationName(OldIndex))));
517 : :
518 : : /*
519 : : * Disallow clustering on incomplete indexes (those that might not index
520 : : * every row of the relation). We could relax this by making a separate
521 : : * seqscan pass over the table to copy the missing rows, but that seems
522 : : * expensive and tedious.
523 : : */
524 [ + - ]: 40 : if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
525 [ # # # # ]: 0 : ereport(ERROR,
526 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
527 : : errmsg("cannot cluster on partial index \"%s\"",
528 : : RelationGetRelationName(OldIndex))));
529 : :
530 : : /*
531 : : * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
532 : : * it might well not contain entries for every heap row, or might not even
533 : : * be internally consistent. (But note that we don't check indcheckxmin;
534 : : * the worst consequence of following broken HOT chains would be that we
535 : : * might put recently-dead tuples out-of-order in the new table, and there
536 : : * is little harm in that.)
537 : : */
538 [ + + ]: 40 : if (!OldIndex->rd_index->indisvalid)
539 [ + - + - ]: 1 : ereport(ERROR,
540 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
541 : : errmsg("cannot cluster on invalid index \"%s\"",
542 : : RelationGetRelationName(OldIndex))));
543 : :
544 : : /* Drop relcache refcnt on OldIndex, but keep lock */
545 : 39 : index_close(OldIndex, NoLock);
546 : 39 : }
547 : :
548 : : /*
549 : : * mark_index_clustered: mark the specified index as the one clustered on
550 : : *
551 : : * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
552 : : */
553 : : void
554 : 40 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
555 : : {
556 : 40 : HeapTuple indexTuple;
557 : 40 : Form_pg_index indexForm;
558 : 40 : Relation pg_index;
559 : 40 : ListCell *index;
560 : :
561 : : /* Disallow applying to a partitioned table */
562 [ + + ]: 40 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
563 [ + - + - ]: 2 : ereport(ERROR,
564 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
565 : : errmsg("cannot mark index clustered in partitioned table")));
566 : :
567 : : /*
568 : : * If the index is already marked clustered, no need to do anything.
569 : : */
570 [ + + ]: 38 : if (OidIsValid(indexOid))
571 : : {
572 [ + + ]: 36 : if (get_index_isclustered(indexOid))
573 : 5 : return;
574 : 31 : }
575 : :
576 : : /*
577 : : * Check each index of the relation and set/clear the bit as needed.
578 : : */
579 : 33 : pg_index = table_open(IndexRelationId, RowExclusiveLock);
580 : :
581 [ + - + + : 99 : foreach(index, RelationGetIndexList(rel))
+ + ]
582 : : {
583 : 66 : Oid thisIndexOid = lfirst_oid(index);
584 : :
585 : 66 : indexTuple = SearchSysCacheCopy1(INDEXRELID,
586 : : ObjectIdGetDatum(thisIndexOid));
587 [ + - ]: 66 : if (!HeapTupleIsValid(indexTuple))
588 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
589 : 66 : indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
590 : :
591 : : /*
592 : : * Unset the bit if set. We know it's wrong because we checked this
593 : : * earlier.
594 : : */
595 [ + + ]: 66 : if (indexForm->indisclustered)
596 : : {
597 : 5 : indexForm->indisclustered = false;
598 : 5 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
599 : 5 : }
600 [ + + ]: 61 : else if (thisIndexOid == indexOid)
601 : : {
602 : : /* this was checked earlier, but let's be real sure */
603 [ + - ]: 31 : if (!indexForm->indisvalid)
604 [ # # # # ]: 0 : elog(ERROR, "cannot cluster on invalid index %u", indexOid);
605 : 31 : indexForm->indisclustered = true;
606 : 31 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
607 : 31 : }
608 : :
609 [ + - ]: 66 : InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
610 : : InvalidOid, is_internal);
611 : :
612 : 66 : heap_freetuple(indexTuple);
613 : 66 : }
614 : :
615 : 33 : table_close(pg_index, RowExclusiveLock);
616 [ - + ]: 38 : }
617 : :
618 : : /*
619 : : * rebuild_relation: rebuild an existing relation in index or physical order
620 : : *
621 : : * OldHeap: table to rebuild.
622 : : * index: index to cluster by, or NULL to rewrite in physical order.
623 : : *
624 : : * On entry, heap and index (if one is given) must be open, and
625 : : * AccessExclusiveLock held on them.
626 : : * On exit, they are closed, but locks on them are not released.
627 : : */
628 : : static void
629 : 54 : rebuild_relation(Relation OldHeap, Relation index, bool verbose)
630 : : {
631 : 54 : Oid tableOid = RelationGetRelid(OldHeap);
632 : 54 : Oid accessMethod = OldHeap->rd_rel->relam;
633 : 54 : Oid tableSpace = OldHeap->rd_rel->reltablespace;
634 : 54 : Oid OIDNewHeap;
635 : 54 : Relation NewHeap;
636 : 54 : char relpersistence;
637 : 54 : bool is_system_catalog;
638 : 54 : bool swap_toast_by_content;
639 : 54 : TransactionId frozenXid;
640 : 54 : MultiXactId cutoffMulti;
641 : :
642 [ + - + + ]: 54 : Assert(CheckRelationLockedByMe(OldHeap, AccessExclusiveLock, false) &&
643 : : (index == NULL || CheckRelationLockedByMe(index, AccessExclusiveLock, false)));
644 : :
645 [ + + ]: 54 : if (index)
646 : : /* Mark the correct index as clustered */
647 : 27 : mark_index_clustered(OldHeap, RelationGetRelid(index), true);
648 : :
649 : : /* Remember info about rel before closing OldHeap */
650 : 54 : relpersistence = OldHeap->rd_rel->relpersistence;
651 : 54 : is_system_catalog = IsSystemRelation(OldHeap);
652 : :
653 : : /*
654 : : * Create the transient table that will receive the re-ordered data.
655 : : *
656 : : * OldHeap is already locked, so no need to lock it again. make_new_heap
657 : : * obtains AccessExclusiveLock on the new heap and its toast table.
658 : : */
659 : 108 : OIDNewHeap = make_new_heap(tableOid, tableSpace,
660 : 54 : accessMethod,
661 : 54 : relpersistence,
662 : : NoLock);
663 [ + - ]: 54 : Assert(CheckRelationOidLockedByMe(OIDNewHeap, AccessExclusiveLock, false));
664 : 54 : NewHeap = table_open(OIDNewHeap, NoLock);
665 : :
666 : : /* Copy the heap data into the new table in the desired order */
667 : 54 : copy_table_data(NewHeap, OldHeap, index, verbose,
668 : : &swap_toast_by_content, &frozenXid, &cutoffMulti);
669 : :
670 : :
671 : : /* Close relcache entries, but keep lock until transaction commit */
672 : 54 : table_close(OldHeap, NoLock);
673 [ + + ]: 54 : if (index)
674 : 27 : index_close(index, NoLock);
675 : :
676 : : /*
677 : : * Close the new relation so it can be dropped as soon as the storage is
678 : : * swapped. The relation is not visible to others, so no need to unlock it
679 : : * explicitly.
680 : : */
681 : 54 : table_close(NewHeap, NoLock);
682 : :
683 : : /*
684 : : * Swap the physical files of the target and transient tables, then
685 : : * rebuild the target's indexes and throw away the transient table.
686 : : */
687 : 108 : finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
688 : 54 : swap_toast_by_content, false, true,
689 : 54 : frozenXid, cutoffMulti,
690 : 54 : relpersistence);
691 : 54 : }
692 : :
693 : :
694 : : /*
695 : : * Create the transient table that will be filled with new data during
696 : : * CLUSTER, ALTER TABLE, and similar operations. The transient table
697 : : * duplicates the logical structure of the OldHeap; but will have the
698 : : * specified physical storage properties NewTableSpace, NewAccessMethod, and
699 : : * relpersistence.
700 : : *
701 : : * After this, the caller should load the new heap with transferred/modified
702 : : * data, then call finish_heap_swap to complete the operation.
703 : : */
704 : : Oid
705 : 300 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
706 : : char relpersistence, LOCKMODE lockmode)
707 : : {
708 : 300 : TupleDesc OldHeapDesc;
709 : 300 : char NewHeapName[NAMEDATALEN];
710 : 300 : Oid OIDNewHeap;
711 : 300 : Oid toastid;
712 : 300 : Relation OldHeap;
713 : 300 : HeapTuple tuple;
714 : 300 : Datum reloptions;
715 : 300 : bool isNull;
716 : 300 : Oid namespaceid;
717 : :
718 : 300 : OldHeap = table_open(OIDOldHeap, lockmode);
719 : 300 : OldHeapDesc = RelationGetDescr(OldHeap);
720 : :
721 : : /*
722 : : * Note that the NewHeap will not receive any of the defaults or
723 : : * constraints associated with the OldHeap; we don't need 'em, and there's
724 : : * no reason to spend cycles inserting them into the catalogs only to
725 : : * delete them.
726 : : */
727 : :
728 : : /*
729 : : * But we do want to use reloptions of the old heap for new heap.
730 : : */
731 : 300 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
732 [ + - ]: 300 : if (!HeapTupleIsValid(tuple))
733 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
734 : 300 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
735 : : &isNull);
736 [ + + ]: 300 : if (isNull)
737 : 278 : reloptions = (Datum) 0;
738 : :
739 [ + + ]: 300 : if (relpersistence == RELPERSISTENCE_TEMP)
740 : 22 : namespaceid = LookupCreationNamespace("pg_temp");
741 : : else
742 : 278 : namespaceid = RelationGetNamespace(OldHeap);
743 : :
744 : : /*
745 : : * Create the new heap, using a temporary name in the same namespace as
746 : : * the existing table. NOTE: there is some risk of collision with user
747 : : * relnames. Working around this seems more trouble than it's worth; in
748 : : * particular, we can't create the new heap in a different namespace from
749 : : * the old, or we will have problems with the TEMP status of temp tables.
750 : : *
751 : : * Note: the new heap is not a shared relation, even if we are rebuilding
752 : : * a shared rel. However, we do make the new heap mapped if the source is
753 : : * mapped. This simplifies swap_relation_files, and is absolutely
754 : : * necessary for rebuilding pg_class, for reasons explained there.
755 : : */
756 : 300 : snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
757 : :
758 : 600 : OIDNewHeap = heap_create_with_catalog(NewHeapName,
759 : 300 : namespaceid,
760 : 300 : NewTableSpace,
761 : : InvalidOid,
762 : : InvalidOid,
763 : : InvalidOid,
764 : 300 : OldHeap->rd_rel->relowner,
765 : 300 : NewAccessMethod,
766 : 300 : OldHeapDesc,
767 : : NIL,
768 : : RELKIND_RELATION,
769 : 300 : relpersistence,
770 : : false,
771 [ + + + - : 300 : RelationIsMapped(OldHeap),
+ - + + +
- ]
772 : : ONCOMMIT_NOOP,
773 : 300 : reloptions,
774 : : false,
775 : : true,
776 : : true,
777 : 300 : OIDOldHeap,
778 : : NULL);
779 [ + - ]: 300 : Assert(OIDNewHeap != InvalidOid);
780 : :
781 : 300 : ReleaseSysCache(tuple);
782 : :
783 : : /*
784 : : * Advance command counter so that the newly-created relation's catalog
785 : : * tuples will be visible to table_open.
786 : : */
787 : 300 : CommandCounterIncrement();
788 : :
789 : : /*
790 : : * If necessary, create a TOAST table for the new relation.
791 : : *
792 : : * If the relation doesn't have a TOAST table already, we can't need one
793 : : * for the new relation. The other way around is possible though: if some
794 : : * wide columns have been dropped, NewHeapCreateToastTable can decide that
795 : : * no TOAST table is needed for the new table.
796 : : *
797 : : * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
798 : : * that the TOAST table will be visible for insertion.
799 : : */
800 : 300 : toastid = OldHeap->rd_rel->reltoastrelid;
801 [ + + ]: 300 : if (OidIsValid(toastid))
802 : : {
803 : : /* keep the existing toast table's reloptions, if any */
804 : 101 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
805 [ + - ]: 101 : if (!HeapTupleIsValid(tuple))
806 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u", toastid);
807 : 101 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
808 : : &isNull);
809 [ - + ]: 101 : if (isNull)
810 : 101 : reloptions = (Datum) 0;
811 : :
812 : 101 : NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
813 : :
814 : 101 : ReleaseSysCache(tuple);
815 : 101 : }
816 : :
817 : 300 : table_close(OldHeap, NoLock);
818 : :
819 : 600 : return OIDNewHeap;
820 : 300 : }
821 : :
822 : : /*
823 : : * Do the physical copying of table data.
824 : : *
825 : : * There are three output parameters:
826 : : * *pSwapToastByContent is set true if toast tables must be swapped by content.
827 : : * *pFreezeXid receives the TransactionId used as freeze cutoff point.
828 : : * *pCutoffMulti receives the MultiXactId used as a cutoff point.
829 : : */
830 : : static void
831 : 54 : copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose,
832 : : bool *pSwapToastByContent, TransactionId *pFreezeXid,
833 : : MultiXactId *pCutoffMulti)
834 : : {
835 : 54 : Relation relRelation;
836 : 54 : HeapTuple reltup;
837 : 54 : Form_pg_class relform;
838 : 54 : TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
839 : 54 : TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
840 : 54 : VacuumParams params;
841 : 54 : struct VacuumCutoffs cutoffs;
842 : 54 : bool use_sort;
843 : 108 : double num_tuples = 0,
844 : 54 : tups_vacuumed = 0,
845 : 54 : tups_recently_dead = 0;
846 : 54 : BlockNumber num_pages;
847 : 54 : int elevel = verbose ? INFO : DEBUG2;
848 : 54 : PGRUsage ru0;
849 : 54 : char *nspname;
850 : :
851 : 54 : pg_rusage_init(&ru0);
852 : :
853 : : /* Store a copy of the namespace name for logging purposes */
854 : 54 : nspname = get_namespace_name(RelationGetNamespace(OldHeap));
855 : :
856 : : /*
857 : : * Their tuple descriptors should be exactly alike, but here we only need
858 : : * assume that they have the same number of columns.
859 : : */
860 : 54 : oldTupDesc = RelationGetDescr(OldHeap);
861 : 54 : newTupDesc = RelationGetDescr(NewHeap);
862 [ + - ]: 54 : Assert(newTupDesc->natts == oldTupDesc->natts);
863 : :
864 : : /*
865 : : * If the OldHeap has a toast table, get lock on the toast table to keep
866 : : * it from being vacuumed. This is needed because autovacuum processes
867 : : * toast tables independently of their main tables, with no lock on the
868 : : * latter. If an autovacuum were to start on the toast table after we
869 : : * compute our OldestXmin below, it would use a later OldestXmin, and then
870 : : * possibly remove as DEAD toast tuples belonging to main tuples we think
871 : : * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
872 : : * tuples.
873 : : *
874 : : * We don't need to open the toast relation here, just lock it. The lock
875 : : * will be held till end of transaction.
876 : : */
877 [ + + ]: 54 : if (OldHeap->rd_rel->reltoastrelid)
878 : 13 : LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
879 : :
880 : : /*
881 : : * If both tables have TOAST tables, perform toast swap by content. It is
882 : : * possible that the old table has a toast table but the new one doesn't,
883 : : * if toastable columns have been dropped. In that case we have to do
884 : : * swap by links. This is okay because swap by content is only essential
885 : : * for system catalogs, and we don't support schema changes for them.
886 : : */
887 [ + + - + ]: 54 : if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
888 : : {
889 : 13 : *pSwapToastByContent = true;
890 : :
891 : : /*
892 : : * When doing swap by content, any toast pointers written into NewHeap
893 : : * must use the old toast table's OID, because that's where the toast
894 : : * data will eventually be found. Set this up by setting rd_toastoid.
895 : : * This also tells toast_save_datum() to preserve the toast value
896 : : * OIDs, which we want so as not to invalidate toast pointers in
897 : : * system catalog caches, and to avoid making multiple copies of a
898 : : * single toast value.
899 : : *
900 : : * Note that we must hold NewHeap open until we are done writing data,
901 : : * since the relcache will not guarantee to remember this setting once
902 : : * the relation is closed. Also, this technique depends on the fact
903 : : * that no one will try to read from the NewHeap until after we've
904 : : * finished writing it and swapping the rels --- otherwise they could
905 : : * follow the toast pointers to the wrong place. (It would actually
906 : : * work for values copied over from the old toast table, but not for
907 : : * any values that we toast which were previously not toasted.)
908 : : */
909 : 13 : NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
910 : 13 : }
911 : : else
912 : 41 : *pSwapToastByContent = false;
913 : :
914 : : /*
915 : : * Compute xids used to freeze and weed out dead tuples and multixacts.
916 : : * Since we're going to rewrite the whole table anyway, there's no reason
917 : : * not to be aggressive about this.
918 : : */
919 : 54 : memset(¶ms, 0, sizeof(VacuumParams));
920 : 54 : vacuum_get_cutoffs(OldHeap, params, &cutoffs);
921 : :
922 : : /*
923 : : * FreezeXid will become the table's new relfrozenxid, and that mustn't go
924 : : * backwards, so take the max.
925 : : */
926 : : {
927 : 54 : TransactionId relfrozenxid = OldHeap->rd_rel->relfrozenxid;
928 : :
929 [ + - + + ]: 54 : if (TransactionIdIsValid(relfrozenxid) &&
930 : 54 : TransactionIdPrecedes(cutoffs.FreezeLimit, relfrozenxid))
931 : 15 : cutoffs.FreezeLimit = relfrozenxid;
932 : 54 : }
933 : :
934 : : /*
935 : : * MultiXactCutoff, similarly, shouldn't go backwards either.
936 : : */
937 : : {
938 : 54 : MultiXactId relminmxid = OldHeap->rd_rel->relminmxid;
939 : :
940 [ + - + - ]: 54 : if (MultiXactIdIsValid(relminmxid) &&
941 : 54 : MultiXactIdPrecedes(cutoffs.MultiXactCutoff, relminmxid))
942 : 0 : cutoffs.MultiXactCutoff = relminmxid;
943 : 54 : }
944 : :
945 : : /*
946 : : * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
947 : : * the OldHeap. We know how to use a sort to duplicate the ordering of a
948 : : * btree index, and will use seqscan-and-sort for that case if the planner
949 : : * tells us it's cheaper. Otherwise, always indexscan if an index is
950 : : * provided, else plain seqscan.
951 : : */
952 [ + + - + ]: 54 : if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
953 : 54 : use_sort = plan_cluster_use_sort(RelationGetRelid(OldHeap),
954 : 27 : RelationGetRelid(OldIndex));
955 : : else
956 : 27 : use_sort = false;
957 : :
958 : : /* Log what we're doing */
959 [ + + + + ]: 54 : if (OldIndex != NULL && !use_sort)
960 [ - + # # : 11 : ereport(elevel,
- + - + #
# ]
961 : : (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
962 : : nspname,
963 : : RelationGetRelationName(OldHeap),
964 : : RelationGetRelationName(OldIndex))));
965 [ + + ]: 43 : else if (use_sort)
966 [ - + # # : 16 : ereport(elevel,
- + - + #
# ]
967 : : (errmsg("clustering \"%s.%s\" using sequential scan and sort",
968 : : nspname,
969 : : RelationGetRelationName(OldHeap))));
970 : : else
971 [ - + # # : 27 : ereport(elevel,
- + - + #
# ]
972 : : (errmsg("vacuuming \"%s.%s\"",
973 : : nspname,
974 : : RelationGetRelationName(OldHeap))));
975 : :
976 : : /*
977 : : * Hand off the actual copying to AM specific function, the generic code
978 : : * cannot know how to deal with visibility across AMs. Note that this
979 : : * routine is allowed to set FreezeXid / MultiXactCutoff to different
980 : : * values (e.g. because the AM doesn't use freezing).
981 : : */
982 : 108 : table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
983 : 54 : cutoffs.OldestXmin, &cutoffs.FreezeLimit,
984 : 54 : &cutoffs.MultiXactCutoff,
985 : : &num_tuples, &tups_vacuumed,
986 : : &tups_recently_dead);
987 : :
988 : : /* return selected values to caller, get set as relfrozenxid/minmxid */
989 : 54 : *pFreezeXid = cutoffs.FreezeLimit;
990 : 54 : *pCutoffMulti = cutoffs.MultiXactCutoff;
991 : :
992 : : /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
993 : 54 : NewHeap->rd_toastoid = InvalidOid;
994 : :
995 : 54 : num_pages = RelationGetNumberOfBlocks(NewHeap);
996 : :
997 : : /* Log what we did */
998 [ - + # # : 54 : ereport(elevel,
- + - + #
# ]
999 : : (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1000 : : nspname,
1001 : : RelationGetRelationName(OldHeap),
1002 : : tups_vacuumed, num_tuples,
1003 : : RelationGetNumberOfBlocks(OldHeap)),
1004 : : errdetail("%.0f dead row versions cannot be removed yet.\n"
1005 : : "%s.",
1006 : : tups_recently_dead,
1007 : : pg_rusage_show(&ru0))));
1008 : :
1009 : : /* Update pg_class to reflect the correct values of pages and tuples. */
1010 : 54 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1011 : :
1012 : 54 : reltup = SearchSysCacheCopy1(RELOID,
1013 : : ObjectIdGetDatum(RelationGetRelid(NewHeap)));
1014 [ + - ]: 54 : if (!HeapTupleIsValid(reltup))
1015 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u",
1016 : : RelationGetRelid(NewHeap));
1017 : 54 : relform = (Form_pg_class) GETSTRUCT(reltup);
1018 : :
1019 : 54 : relform->relpages = num_pages;
1020 : 54 : relform->reltuples = num_tuples;
1021 : :
1022 : : /* Don't update the stats for pg_class. See swap_relation_files. */
1023 [ + + ]: 54 : if (RelationGetRelid(OldHeap) != RelationRelationId)
1024 : 53 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1025 : : else
1026 : 1 : CacheInvalidateRelcacheByTuple(reltup);
1027 : :
1028 : : /* Clean up. */
1029 : 54 : heap_freetuple(reltup);
1030 : 54 : table_close(relRelation, RowExclusiveLock);
1031 : :
1032 : : /* Make the update visible */
1033 : 54 : CommandCounterIncrement();
1034 : 54 : }
1035 : :
1036 : : /*
1037 : : * Swap the physical files of two given relations.
1038 : : *
1039 : : * We swap the physical identity (reltablespace, relfilenumber) while keeping
1040 : : * the same logical identities of the two relations. relpersistence is also
1041 : : * swapped, which is critical since it determines where buffers live for each
1042 : : * relation.
1043 : : *
1044 : : * We can swap associated TOAST data in either of two ways: recursively swap
1045 : : * the physical content of the toast tables (and their indexes), or swap the
1046 : : * TOAST links in the given relations' pg_class entries. The former is needed
1047 : : * to manage rewrites of shared catalogs (where we cannot change the pg_class
1048 : : * links) while the latter is the only way to handle cases in which a toast
1049 : : * table is added or removed altogether.
1050 : : *
1051 : : * Additionally, the first relation is marked with relfrozenxid set to
1052 : : * frozenXid. It seems a bit ugly to have this here, but the caller would
1053 : : * have to do it anyway, so having it here saves a heap_update. Note: in
1054 : : * the swap-toast-links case, we assume we don't need to change the toast
1055 : : * table's relfrozenxid: the new version of the toast table should already
1056 : : * have relfrozenxid set to RecentXmin, which is good enough.
1057 : : *
1058 : : * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1059 : : * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1060 : : * having to look the information up again later in finish_heap_swap.
1061 : : */
1062 : : static void
1063 : 303 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1064 : : bool swap_toast_by_content,
1065 : : bool is_internal,
1066 : : TransactionId frozenXid,
1067 : : MultiXactId cutoffMulti,
1068 : : Oid *mapped_tables)
1069 : : {
1070 : 303 : Relation relRelation;
1071 : 303 : HeapTuple reltup1,
1072 : : reltup2;
1073 : 303 : Form_pg_class relform1,
1074 : : relform2;
1075 : 303 : RelFileNumber relfilenumber1,
1076 : : relfilenumber2;
1077 : 303 : RelFileNumber swaptemp;
1078 : 303 : char swptmpchr;
1079 : 303 : Oid relam1,
1080 : : relam2;
1081 : :
1082 : : /* We need writable copies of both pg_class tuples. */
1083 : 303 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1084 : :
1085 : 303 : reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
1086 [ + - ]: 303 : if (!HeapTupleIsValid(reltup1))
1087 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u", r1);
1088 : 303 : relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1089 : :
1090 : 303 : reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
1091 [ + - ]: 303 : if (!HeapTupleIsValid(reltup2))
1092 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u", r2);
1093 : 303 : relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1094 : :
1095 : 303 : relfilenumber1 = relform1->relfilenode;
1096 : 303 : relfilenumber2 = relform2->relfilenode;
1097 : 303 : relam1 = relform1->relam;
1098 : 303 : relam2 = relform2->relam;
1099 : :
1100 [ + + - + ]: 303 : if (RelFileNumberIsValid(relfilenumber1) &&
1101 : 299 : RelFileNumberIsValid(relfilenumber2))
1102 : : {
1103 : : /*
1104 : : * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1105 : : * relpersistence
1106 : : */
1107 [ + - ]: 299 : Assert(!target_is_pg_class);
1108 : :
1109 : 299 : swaptemp = relform1->relfilenode;
1110 : 299 : relform1->relfilenode = relform2->relfilenode;
1111 : 299 : relform2->relfilenode = swaptemp;
1112 : :
1113 : 299 : swaptemp = relform1->reltablespace;
1114 : 299 : relform1->reltablespace = relform2->reltablespace;
1115 : 299 : relform2->reltablespace = swaptemp;
1116 : :
1117 : 299 : swaptemp = relform1->relam;
1118 : 299 : relform1->relam = relform2->relam;
1119 : 299 : relform2->relam = swaptemp;
1120 : :
1121 : 299 : swptmpchr = relform1->relpersistence;
1122 : 299 : relform1->relpersistence = relform2->relpersistence;
1123 : 299 : relform2->relpersistence = swptmpchr;
1124 : :
1125 : : /* Also swap toast links, if we're swapping by links */
1126 [ + + ]: 299 : if (!swap_toast_by_content)
1127 : : {
1128 : 263 : swaptemp = relform1->reltoastrelid;
1129 : 263 : relform1->reltoastrelid = relform2->reltoastrelid;
1130 : 263 : relform2->reltoastrelid = swaptemp;
1131 : 263 : }
1132 : 299 : }
1133 : : else
1134 : : {
1135 : : /*
1136 : : * Mapped-relation case. Here we have to swap the relation mappings
1137 : : * instead of modifying the pg_class columns. Both must be mapped.
1138 : : */
1139 [ + - ]: 4 : if (RelFileNumberIsValid(relfilenumber1) ||
1140 : 4 : RelFileNumberIsValid(relfilenumber2))
1141 [ # # # # ]: 0 : elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1142 : : NameStr(relform1->relname));
1143 : :
1144 : : /*
1145 : : * We can't change the tablespace nor persistence of a mapped rel, and
1146 : : * we can't handle toast link swapping for one either, because we must
1147 : : * not apply any critical changes to its pg_class row. These cases
1148 : : * should be prevented by upstream permissions tests, so these checks
1149 : : * are non-user-facing emergency backstop.
1150 : : */
1151 [ + - ]: 4 : if (relform1->reltablespace != relform2->reltablespace)
1152 [ # # # # ]: 0 : elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1153 : : NameStr(relform1->relname));
1154 [ + - ]: 4 : if (relform1->relpersistence != relform2->relpersistence)
1155 [ # # # # ]: 0 : elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1156 : : NameStr(relform1->relname));
1157 [ + - ]: 4 : if (relform1->relam != relform2->relam)
1158 [ # # # # ]: 0 : elog(ERROR, "cannot change access method of mapped relation \"%s\"",
1159 : : NameStr(relform1->relname));
1160 [ + + ]: 5 : if (!swap_toast_by_content &&
1161 [ + - ]: 1 : (relform1->reltoastrelid || relform2->reltoastrelid))
1162 [ # # # # ]: 0 : elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1163 : : NameStr(relform1->relname));
1164 : :
1165 : : /*
1166 : : * Fetch the mappings --- shouldn't fail, but be paranoid
1167 : : */
1168 : 4 : relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
1169 [ + - ]: 4 : if (!RelFileNumberIsValid(relfilenumber1))
1170 [ # # # # ]: 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1171 : : NameStr(relform1->relname), r1);
1172 : 4 : relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
1173 [ + - ]: 4 : if (!RelFileNumberIsValid(relfilenumber2))
1174 [ # # # # ]: 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1175 : : NameStr(relform2->relname), r2);
1176 : :
1177 : : /*
1178 : : * Send replacement mappings to relmapper. Note these won't actually
1179 : : * take effect until CommandCounterIncrement.
1180 : : */
1181 : 4 : RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
1182 : 4 : RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
1183 : :
1184 : : /* Pass OIDs of mapped r2 tables back to caller */
1185 : 4 : *mapped_tables++ = r2;
1186 : : }
1187 : :
1188 : : /*
1189 : : * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1190 : : * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1191 : : * new.
1192 : : */
1193 : : {
1194 : 303 : Relation rel1,
1195 : : rel2;
1196 : :
1197 : 303 : rel1 = relation_open(r1, NoLock);
1198 : 303 : rel2 = relation_open(r2, NoLock);
1199 : 303 : rel2->rd_createSubid = rel1->rd_createSubid;
1200 : 303 : rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
1201 : 303 : rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
1202 : 303 : RelationAssumeNewRelfilelocator(rel1);
1203 : 303 : relation_close(rel1, NoLock);
1204 : 303 : relation_close(rel2, NoLock);
1205 : 303 : }
1206 : :
1207 : : /*
1208 : : * In the case of a shared catalog, these next few steps will only affect
1209 : : * our own database's pg_class row; but that's okay, because they are all
1210 : : * noncritical updates. That's also an important fact for the case of a
1211 : : * mapped catalog, because it's possible that we'll commit the map change
1212 : : * and then fail to commit the pg_class update.
1213 : : */
1214 : :
1215 : : /* set rel1's frozen Xid and minimum MultiXid */
1216 [ + + ]: 303 : if (relform1->relkind != RELKIND_INDEX)
1217 : : {
1218 [ + - + - ]: 290 : Assert(!TransactionIdIsValid(frozenXid) ||
1219 : : TransactionIdIsNormal(frozenXid));
1220 : 290 : relform1->relfrozenxid = frozenXid;
1221 : 290 : relform1->relminmxid = cutoffMulti;
1222 : 290 : }
1223 : :
1224 : : /* swap size statistics too, since new rel has freshly-updated stats */
1225 : : {
1226 : 303 : int32 swap_pages;
1227 : 303 : float4 swap_tuples;
1228 : 303 : int32 swap_allvisible;
1229 : 303 : int32 swap_allfrozen;
1230 : :
1231 : 303 : swap_pages = relform1->relpages;
1232 : 303 : relform1->relpages = relform2->relpages;
1233 : 303 : relform2->relpages = swap_pages;
1234 : :
1235 : 303 : swap_tuples = relform1->reltuples;
1236 : 303 : relform1->reltuples = relform2->reltuples;
1237 : 303 : relform2->reltuples = swap_tuples;
1238 : :
1239 : 303 : swap_allvisible = relform1->relallvisible;
1240 : 303 : relform1->relallvisible = relform2->relallvisible;
1241 : 303 : relform2->relallvisible = swap_allvisible;
1242 : :
1243 : 303 : swap_allfrozen = relform1->relallfrozen;
1244 : 303 : relform1->relallfrozen = relform2->relallfrozen;
1245 : 303 : relform2->relallfrozen = swap_allfrozen;
1246 : 303 : }
1247 : :
1248 : : /*
1249 : : * Update the tuples in pg_class --- unless the target relation of the
1250 : : * swap is pg_class itself. In that case, there is zero point in making
1251 : : * changes because we'd be updating the old data that we're about to throw
1252 : : * away. Because the real work being done here for a mapped relation is
1253 : : * just to change the relation map settings, it's all right to not update
1254 : : * the pg_class rows in this case. The most important changes will instead
1255 : : * performed later, in finish_heap_swap() itself.
1256 : : */
1257 [ + + ]: 303 : if (!target_is_pg_class)
1258 : : {
1259 : 302 : CatalogIndexState indstate;
1260 : :
1261 : 302 : indstate = CatalogOpenIndexes(relRelation);
1262 : 604 : CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1263 : 302 : indstate);
1264 : 604 : CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1265 : 302 : indstate);
1266 : 302 : CatalogCloseIndexes(indstate);
1267 : 302 : }
1268 : : else
1269 : : {
1270 : : /* no update ... but we do still need relcache inval */
1271 : 1 : CacheInvalidateRelcacheByTuple(reltup1);
1272 : 1 : CacheInvalidateRelcacheByTuple(reltup2);
1273 : : }
1274 : :
1275 : : /*
1276 : : * Now that pg_class has been updated with its relevant information for
1277 : : * the swap, update the dependency of the relations to point to their new
1278 : : * table AM, if it has changed.
1279 : : */
1280 [ + + ]: 303 : if (relam1 != relam2)
1281 : : {
1282 : 6 : if (changeDependencyFor(RelationRelationId,
1283 : 6 : r1,
1284 : : AccessMethodRelationId,
1285 : 6 : relam1,
1286 [ + - + - ]: 12 : relam2) != 1)
1287 [ # # # # ]: 0 : elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1288 : : get_namespace_name(get_rel_namespace(r1)),
1289 : : get_rel_name(r1));
1290 : 6 : if (changeDependencyFor(RelationRelationId,
1291 : 6 : r2,
1292 : : AccessMethodRelationId,
1293 : 6 : relam2,
1294 [ + - + - ]: 12 : relam1) != 1)
1295 [ # # # # ]: 0 : elog(ERROR, "could not change access method dependency for relation \"%s.%s\"",
1296 : : get_namespace_name(get_rel_namespace(r2)),
1297 : : get_rel_name(r2));
1298 : 6 : }
1299 : :
1300 : : /*
1301 : : * Post alter hook for modified relations. The change to r2 is always
1302 : : * internal, but r1 depends on the invocation context.
1303 : : */
1304 [ + - ]: 303 : InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1305 : : InvalidOid, is_internal);
1306 [ + - ]: 303 : InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1307 : : InvalidOid, true);
1308 : :
1309 : : /*
1310 : : * If we have toast tables associated with the relations being swapped,
1311 : : * deal with them too.
1312 : : */
1313 [ + + + + ]: 303 : if (relform1->reltoastrelid || relform2->reltoastrelid)
1314 : : {
1315 [ + + ]: 97 : if (swap_toast_by_content)
1316 : : {
1317 [ + - ]: 13 : if (relform1->reltoastrelid && relform2->reltoastrelid)
1318 : : {
1319 : : /* Recursively swap the contents of the toast tables */
1320 : 26 : swap_relation_files(relform1->reltoastrelid,
1321 : 13 : relform2->reltoastrelid,
1322 : 13 : target_is_pg_class,
1323 : 13 : swap_toast_by_content,
1324 : 13 : is_internal,
1325 : 13 : frozenXid,
1326 : 13 : cutoffMulti,
1327 : 13 : mapped_tables);
1328 : 13 : }
1329 : : else
1330 : : {
1331 : : /* caller messed up */
1332 [ # # # # ]: 0 : elog(ERROR, "cannot swap toast files by content when there's only one");
1333 : : }
1334 : 13 : }
1335 : : else
1336 : : {
1337 : : /*
1338 : : * We swapped the ownership links, so we need to change dependency
1339 : : * data to match.
1340 : : *
1341 : : * NOTE: it is possible that only one table has a toast table.
1342 : : *
1343 : : * NOTE: at present, a TOAST table's only dependency is the one on
1344 : : * its owning table. If more are ever created, we'd need to use
1345 : : * something more selective than deleteDependencyRecordsFor() to
1346 : : * get rid of just the link we want.
1347 : : */
1348 : 84 : ObjectAddress baseobject,
1349 : : toastobject;
1350 : 84 : long count;
1351 : :
1352 : : /*
1353 : : * We disallow this case for system catalogs, to avoid the
1354 : : * possibility that the catalog we're rebuilding is one of the
1355 : : * ones the dependency changes would change. It's too late to be
1356 : : * making any data changes to the target catalog.
1357 : : */
1358 [ + - ]: 84 : if (IsSystemClass(r1, relform1))
1359 [ # # # # ]: 0 : elog(ERROR, "cannot swap toast files by links for system catalogs");
1360 : :
1361 : : /* Delete old dependencies */
1362 [ + + ]: 84 : if (relform1->reltoastrelid)
1363 : : {
1364 : 79 : count = deleteDependencyRecordsFor(RelationRelationId,
1365 : 79 : relform1->reltoastrelid,
1366 : : false);
1367 [ + - ]: 79 : if (count != 1)
1368 [ # # # # ]: 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1369 : : count);
1370 : 79 : }
1371 [ - + ]: 84 : if (relform2->reltoastrelid)
1372 : : {
1373 : 84 : count = deleteDependencyRecordsFor(RelationRelationId,
1374 : 84 : relform2->reltoastrelid,
1375 : : false);
1376 [ + - ]: 84 : if (count != 1)
1377 [ # # # # ]: 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
1378 : : count);
1379 : 84 : }
1380 : :
1381 : : /* Register new dependencies */
1382 : 84 : baseobject.classId = RelationRelationId;
1383 : 84 : baseobject.objectSubId = 0;
1384 : 84 : toastobject.classId = RelationRelationId;
1385 : 84 : toastobject.objectSubId = 0;
1386 : :
1387 [ + + ]: 84 : if (relform1->reltoastrelid)
1388 : : {
1389 : 79 : baseobject.objectId = r1;
1390 : 79 : toastobject.objectId = relform1->reltoastrelid;
1391 : 79 : recordDependencyOn(&toastobject, &baseobject,
1392 : : DEPENDENCY_INTERNAL);
1393 : 79 : }
1394 : :
1395 [ - + ]: 84 : if (relform2->reltoastrelid)
1396 : : {
1397 : 84 : baseobject.objectId = r2;
1398 : 84 : toastobject.objectId = relform2->reltoastrelid;
1399 : 84 : recordDependencyOn(&toastobject, &baseobject,
1400 : : DEPENDENCY_INTERNAL);
1401 : 84 : }
1402 : 84 : }
1403 : 97 : }
1404 : :
1405 : : /*
1406 : : * If we're swapping two toast tables by content, do the same for their
1407 : : * valid index. The swap can actually be safely done only if the relations
1408 : : * have indexes.
1409 : : */
1410 [ + + ]: 303 : if (swap_toast_by_content &&
1411 [ + + - + ]: 39 : relform1->relkind == RELKIND_TOASTVALUE &&
1412 : 13 : relform2->relkind == RELKIND_TOASTVALUE)
1413 : : {
1414 : 13 : Oid toastIndex1,
1415 : : toastIndex2;
1416 : :
1417 : : /* Get valid index for each relation */
1418 : 13 : toastIndex1 = toast_get_valid_index(r1,
1419 : : AccessExclusiveLock);
1420 : 13 : toastIndex2 = toast_get_valid_index(r2,
1421 : : AccessExclusiveLock);
1422 : :
1423 : 26 : swap_relation_files(toastIndex1,
1424 : 13 : toastIndex2,
1425 : 13 : target_is_pg_class,
1426 : 13 : swap_toast_by_content,
1427 : 13 : is_internal,
1428 : : InvalidTransactionId,
1429 : : InvalidMultiXactId,
1430 : 13 : mapped_tables);
1431 : 13 : }
1432 : :
1433 : : /* Clean up. */
1434 : 303 : heap_freetuple(reltup1);
1435 : 303 : heap_freetuple(reltup2);
1436 : :
1437 : 303 : table_close(relRelation, RowExclusiveLock);
1438 : 303 : }
1439 : :
1440 : : /*
1441 : : * Remove the transient table that was built by make_new_heap, and finish
1442 : : * cleaning up (including rebuilding all indexes on the old heap).
1443 : : */
1444 : : void
1445 : 277 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1446 : : bool is_system_catalog,
1447 : : bool swap_toast_by_content,
1448 : : bool check_constraints,
1449 : : bool is_internal,
1450 : : TransactionId frozenXid,
1451 : : MultiXactId cutoffMulti,
1452 : : char newrelpersistence)
1453 : : {
1454 : 277 : ObjectAddress object;
1455 : 277 : Oid mapped_tables[4];
1456 : 277 : int reindex_flags;
1457 : 277 : ReindexParams reindex_params = {0};
1458 : 277 : int i;
1459 : :
1460 : : /* Report that we are now swapping relation files */
1461 : 277 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1462 : : PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
1463 : :
1464 : : /* Zero out possible results from swapped_relation_files */
1465 : 277 : memset(mapped_tables, 0, sizeof(mapped_tables));
1466 : :
1467 : : /*
1468 : : * Swap the contents of the heap relations (including any toast tables).
1469 : : * Also set old heap's relfrozenxid to frozenXid.
1470 : : */
1471 : 554 : swap_relation_files(OIDOldHeap, OIDNewHeap,
1472 : 277 : (OIDOldHeap == RelationRelationId),
1473 : 277 : swap_toast_by_content, is_internal,
1474 : 277 : frozenXid, cutoffMulti, mapped_tables);
1475 : :
1476 : : /*
1477 : : * If it's a system catalog, queue a sinval message to flush all catcaches
1478 : : * on the catalog when we reach CommandCounterIncrement.
1479 : : */
1480 [ + + ]: 277 : if (is_system_catalog)
1481 : 5 : CacheInvalidateCatalog(OIDOldHeap);
1482 : :
1483 : : /*
1484 : : * Rebuild each index on the relation (but not the toast table, which is
1485 : : * all-new at this point). It is important to do this before the DROP
1486 : : * step because if we are processing a system catalog that will be used
1487 : : * during DROP, we want to have its indexes available. There is no
1488 : : * advantage to the other order anyway because this is all transactional,
1489 : : * so no chance to reclaim disk space before commit. We do not need a
1490 : : * final CommandCounterIncrement() because reindex_relation does it.
1491 : : *
1492 : : * Note: because index_build is called via reindex_relation, it will never
1493 : : * set indcheckxmin true for the indexes. This is OK even though in some
1494 : : * sense we are building new indexes rather than rebuilding existing ones,
1495 : : * because the new heap won't contain any HOT chains at all, let alone
1496 : : * broken ones, so it can't be necessary to set indcheckxmin.
1497 : : */
1498 : 277 : reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
1499 [ + + ]: 277 : if (check_constraints)
1500 : 223 : reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1501 : :
1502 : : /*
1503 : : * Ensure that the indexes have the same persistence as the parent
1504 : : * relation.
1505 : : */
1506 [ + + ]: 277 : if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1507 : 6 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
1508 [ + + ]: 271 : else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1509 : 258 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1510 : :
1511 : : /* Report that we are now reindexing relations */
1512 : 277 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1513 : : PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
1514 : :
1515 : 277 : reindex_relation(NULL, OIDOldHeap, reindex_flags, &reindex_params);
1516 : :
1517 : : /* Report that we are now doing clean up */
1518 : 277 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1519 : : PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
1520 : :
1521 : : /*
1522 : : * If the relation being rebuilt is pg_class, swap_relation_files()
1523 : : * couldn't update pg_class's own pg_class entry (check comments in
1524 : : * swap_relation_files()), thus relfrozenxid was not updated. That's
1525 : : * annoying because a potential reason for doing a VACUUM FULL is a
1526 : : * imminent or actual anti-wraparound shutdown. So, now that we can
1527 : : * access the new relation using its indices, update relfrozenxid.
1528 : : * pg_class doesn't have a toast relation, so we don't need to update the
1529 : : * corresponding toast relation. Not that there's little point moving all
1530 : : * relfrozenxid updates here since swap_relation_files() needs to write to
1531 : : * pg_class for non-mapped relations anyway.
1532 : : */
1533 [ + + ]: 277 : if (OIDOldHeap == RelationRelationId)
1534 : : {
1535 : 1 : Relation relRelation;
1536 : 1 : HeapTuple reltup;
1537 : 1 : Form_pg_class relform;
1538 : :
1539 : 1 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1540 : :
1541 : 1 : reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1542 [ + - ]: 1 : if (!HeapTupleIsValid(reltup))
1543 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
1544 : 1 : relform = (Form_pg_class) GETSTRUCT(reltup);
1545 : :
1546 : 1 : relform->relfrozenxid = frozenXid;
1547 : 1 : relform->relminmxid = cutoffMulti;
1548 : :
1549 : 1 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1550 : :
1551 : 1 : table_close(relRelation, RowExclusiveLock);
1552 : 1 : }
1553 : :
1554 : : /* Destroy new heap with old filenumber */
1555 : 277 : object.classId = RelationRelationId;
1556 : 277 : object.objectId = OIDNewHeap;
1557 : 277 : object.objectSubId = 0;
1558 : :
1559 : : /*
1560 : : * The new relation is local to our transaction and we know nothing
1561 : : * depends on it, so DROP_RESTRICT should be OK.
1562 : : */
1563 : 277 : performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
1564 : :
1565 : : /* performDeletion does CommandCounterIncrement at end */
1566 : :
1567 : : /*
1568 : : * Now we must remove any relation mapping entries that we set up for the
1569 : : * transient table, as well as its toast table and toast index if any. If
1570 : : * we fail to do this before commit, the relmapper will complain about new
1571 : : * permanent map entries being added post-bootstrap.
1572 : : */
1573 [ + + ]: 281 : for (i = 0; OidIsValid(mapped_tables[i]); i++)
1574 : 4 : RelationMapRemoveMapping(mapped_tables[i]);
1575 : :
1576 : : /*
1577 : : * At this point, everything is kosher except that, if we did toast swap
1578 : : * by links, the toast table's name corresponds to the transient table.
1579 : : * The name is irrelevant to the backend because it's referenced by OID,
1580 : : * but users looking at the catalogs could be confused. Rename it to
1581 : : * prevent this problem.
1582 : : *
1583 : : * Note no lock required on the relation, because we already hold an
1584 : : * exclusive lock on it.
1585 : : */
1586 [ + + ]: 277 : if (!swap_toast_by_content)
1587 : : {
1588 : 261 : Relation newrel;
1589 : :
1590 : 261 : newrel = table_open(OIDOldHeap, NoLock);
1591 [ + + ]: 261 : if (OidIsValid(newrel->rd_rel->reltoastrelid))
1592 : : {
1593 : 79 : Oid toastidx;
1594 : 79 : char NewToastName[NAMEDATALEN];
1595 : :
1596 : : /* Get the associated valid index to be renamed */
1597 : 79 : toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1598 : : NoLock);
1599 : :
1600 : : /* rename the toast table ... */
1601 : 158 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1602 : 79 : OIDOldHeap);
1603 : 158 : RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1604 : 79 : NewToastName, true, false);
1605 : :
1606 : : /* ... and its valid index too. */
1607 : 158 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
1608 : 79 : OIDOldHeap);
1609 : :
1610 : 158 : RenameRelationInternal(toastidx,
1611 : 79 : NewToastName, true, true);
1612 : :
1613 : : /*
1614 : : * Reset the relrewrite for the toast. The command-counter
1615 : : * increment is required here as we are about to update the tuple
1616 : : * that is updated as part of RenameRelationInternal.
1617 : : */
1618 : 79 : CommandCounterIncrement();
1619 : 79 : ResetRelRewrite(newrel->rd_rel->reltoastrelid);
1620 : 79 : }
1621 : 261 : relation_close(newrel, NoLock);
1622 : 261 : }
1623 : :
1624 : : /* if it's not a catalog table, clear any missing attribute settings */
1625 [ + + ]: 277 : if (!is_system_catalog)
1626 : : {
1627 : 269 : Relation newrel;
1628 : :
1629 : 269 : newrel = table_open(OIDOldHeap, NoLock);
1630 : 269 : RelationClearMissing(newrel);
1631 : 269 : relation_close(newrel, NoLock);
1632 : 269 : }
1633 : 277 : }
1634 : :
1635 : :
1636 : : /*
1637 : : * Get a list of tables that the current user has privileges on and
1638 : : * have indisclustered set. Return the list in a List * of RelToCluster
1639 : : * (stored in the specified memory context), each one giving the tableOid
1640 : : * and the indexOid on which the table is already clustered.
1641 : : */
1642 : : static List *
1643 : 1 : get_tables_to_cluster(MemoryContext cluster_context)
1644 : : {
1645 : 1 : Relation indRelation;
1646 : 1 : TableScanDesc scan;
1647 : 1 : ScanKeyData entry;
1648 : 1 : HeapTuple indexTuple;
1649 : 1 : Form_pg_index index;
1650 : 1 : MemoryContext old_context;
1651 : 1 : List *rtcs = NIL;
1652 : :
1653 : : /*
1654 : : * Get all indexes that have indisclustered set and that the current user
1655 : : * has the appropriate privileges for.
1656 : : */
1657 : 1 : indRelation = table_open(IndexRelationId, AccessShareLock);
1658 : 1 : ScanKeyInit(&entry,
1659 : : Anum_pg_index_indisclustered,
1660 : : BTEqualStrategyNumber, F_BOOLEQ,
1661 : 1 : BoolGetDatum(true));
1662 : 1 : scan = table_beginscan_catalog(indRelation, 1, &entry);
1663 [ + + ]: 4 : while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1664 : : {
1665 : 3 : RelToCluster *rtc;
1666 : :
1667 : 3 : index = (Form_pg_index) GETSTRUCT(indexTuple);
1668 : :
1669 [ + + ]: 3 : if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
1670 : 2 : continue;
1671 : :
1672 : : /* Use a permanent memory context for the result list */
1673 : 1 : old_context = MemoryContextSwitchTo(cluster_context);
1674 : :
1675 : 1 : rtc = palloc_object(RelToCluster);
1676 : 1 : rtc->tableOid = index->indrelid;
1677 : 1 : rtc->indexOid = index->indexrelid;
1678 : 1 : rtcs = lappend(rtcs, rtc);
1679 : :
1680 : 1 : MemoryContextSwitchTo(old_context);
1681 [ - + + ]: 3 : }
1682 : 1 : table_endscan(scan);
1683 : :
1684 : 1 : relation_close(indRelation, AccessShareLock);
1685 : :
1686 : 2 : return rtcs;
1687 : 1 : }
1688 : :
1689 : : /*
1690 : : * Given an index on a partitioned table, return a list of RelToCluster for
1691 : : * all the children leaves tables/indexes.
1692 : : *
1693 : : * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
1694 : : * on the table containing the index.
1695 : : */
1696 : : static List *
1697 : 2 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
1698 : : {
1699 : 2 : List *inhoids;
1700 : 2 : ListCell *lc;
1701 : 2 : List *rtcs = NIL;
1702 : 2 : MemoryContext old_context;
1703 : :
1704 : : /* Do not lock the children until they're processed */
1705 : 2 : inhoids = find_all_inheritors(indexOid, NoLock, NULL);
1706 : :
1707 [ + - + + : 12 : foreach(lc, inhoids)
+ + ]
1708 : : {
1709 : 10 : Oid indexrelid = lfirst_oid(lc);
1710 : 10 : Oid relid = IndexGetRelation(indexrelid, false);
1711 : 10 : RelToCluster *rtc;
1712 : :
1713 : : /* consider only leaf indexes */
1714 [ + + ]: 10 : if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
1715 : 5 : continue;
1716 : :
1717 : : /*
1718 : : * It's possible that the user does not have privileges to CLUSTER the
1719 : : * leaf partition despite having such privileges on the partitioned
1720 : : * table. We skip any partitions which the user is not permitted to
1721 : : * CLUSTER.
1722 : : */
1723 [ + + ]: 5 : if (!cluster_is_permitted_for_relation(relid, GetUserId()))
1724 : 1 : continue;
1725 : :
1726 : : /* Use a permanent memory context for the result list */
1727 : 4 : old_context = MemoryContextSwitchTo(cluster_context);
1728 : :
1729 : 4 : rtc = palloc_object(RelToCluster);
1730 : 4 : rtc->tableOid = relid;
1731 : 4 : rtc->indexOid = indexrelid;
1732 : 4 : rtcs = lappend(rtcs, rtc);
1733 : :
1734 : 4 : MemoryContextSwitchTo(old_context);
1735 [ - + + ]: 10 : }
1736 : :
1737 : 4 : return rtcs;
1738 : 2 : }
1739 : :
1740 : : /*
1741 : : * Return whether userid has privileges to CLUSTER relid. If not, this
1742 : : * function emits a WARNING.
1743 : : */
1744 : : static bool
1745 : 13 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
1746 : : {
1747 [ + + ]: 13 : if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK)
1748 : 10 : return true;
1749 : :
1750 [ - + + - ]: 3 : ereport(WARNING,
1751 : : (errmsg("permission denied to cluster \"%s\", skipping it",
1752 : : get_rel_name(relid))));
1753 : 3 : return false;
1754 : 13 : }
|