Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pgpa_identifier.c
4 : : * create appropriate identifiers for range table entries
5 : : *
6 : : * The goal of this module is to be able to produce identifiers for range
7 : : * table entries that are unique, understandable to human beings, and
8 : : * able to be reconstructed during future planning cycles. As an
9 : : * exception, we do not care about, or want to produce, identifiers for
10 : : * RTE_JOIN entries. This is because (1) we would end up with a ton of
11 : : * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
12 : : * RTEs; and (3) we intend to refer to joins by their constituent members
13 : : * rather than by reference to the join RTE.
14 : : *
15 : : * In general, we construct identifiers of the following form:
16 : : *
17 : : * alias_name#occurrence_number/child_table_name@subquery_name
18 : : *
19 : : * However, occurrence_number is omitted when it is the first occurrence
20 : : * within the same subquery, child_table_name is omitted for relations that
21 : : * are not child tables, and subquery_name is omitted for the topmost
22 : : * query level. Whenever an item is omitted, the preceding punctuation mark
23 : : * is also omitted. Identifier-style escaping is applied to alias_name and
24 : : * subquery_name. Whenever we include child_table_name, we always
25 : : * schema-qualified name, but writing their own plan advice are not required
26 : : * to do so. Identifier-style escaping is applied to the schema and to the
27 : : * relation names separately.
28 : : *
29 : : * The upshot of all of these rules is that in simple cases, the relation
30 : : * identifier is textually identical to the alias name, making life easier
31 : : * for users. However, even in complex cases, every relation identifier
32 : : * for a given query will be unique (or at least we hope so: if not, this
33 : : * code is buggy and the identifier format might need to be rethought).
34 : : *
35 : : * A key goal of this system is that we want to be able to reconstruct the
36 : : * same identifiers during a future planning cycle for the same query, so
37 : : * that if a certain behavior is specified for a certain identifier, we can
38 : : * properly identify the RTI for which that behavior is mandated. In order
39 : : * for this to work, subquery names must be unique and known before the
40 : : * subquery is planned, and the remainder of the identifier must not depend
41 : : * on any part of the query outside of the current subquery level. In
42 : : * particular, occurrence_number must be calculated relative to the range
43 : : * table for the relevant subquery, not the final flattened range table.
44 : : *
45 : : * Copyright (c) 2016-2024, PostgreSQL Global Development Group
46 : : *
47 : : * contrib/pg_plan_advice/pgpa_identifier.c
48 : : *
49 : : *-------------------------------------------------------------------------
50 : : */
51 : :
52 : : #include "postgres.h"
53 : :
54 : : #include "pgpa_identifier.h"
55 : :
56 : : #include "parser/parsetree.h"
57 : : #include "utils/builtins.h"
58 : : #include "utils/lsyscache.h"
59 : :
60 : : static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
61 : : List *appinfos);
62 : : static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
63 : : SubPlanRTInfo *rtinfo, Index rti);
64 : :
65 : : /*
66 : : * Create a range table identifier from scratch.
67 : : *
68 : : * This function leaves the caller to do all the heavy lifting, so it's
69 : : * generally better to use one of the functions below instead.
70 : : *
71 : : * See the file header comments for more details on the format of an
72 : : * identifier.
73 : : */
74 : : const char *
75 : 190705 : pgpa_identifier_string(const pgpa_identifier *rid)
76 : : {
77 : 190705 : const char *result;
78 : :
79 [ + - ]: 190705 : Assert(rid->alias_name != NULL);
80 : 190705 : result = quote_identifier(rid->alias_name);
81 : :
82 [ + - ]: 190705 : Assert(rid->occurrence >= 0);
83 [ + + ]: 190705 : if (rid->occurrence > 1)
84 : 4009 : result = psprintf("%s#%d", result, rid->occurrence);
85 : :
86 [ + + ]: 190705 : if (rid->partrel != NULL)
87 : : {
88 [ + + ]: 19097 : if (rid->partnsp == NULL)
89 : 14 : result = psprintf("%s/%s", result,
90 : 7 : quote_identifier(rid->partrel));
91 : : else
92 : 38180 : result = psprintf("%s/%s.%s", result,
93 : 19090 : quote_identifier(rid->partnsp),
94 : 19090 : quote_identifier(rid->partrel));
95 : 19097 : }
96 : :
97 [ + + ]: 190705 : if (rid->plan_name != NULL)
98 : 33220 : result = psprintf("%s@%s", result, quote_identifier(rid->plan_name));
99 : :
100 : 381410 : return result;
101 : 190705 : }
102 : :
103 : : /*
104 : : * Compute a relation identifier for a particular RTI.
105 : : *
106 : : * The caller provides root and rti, and gets the necessary details back via
107 : : * the remaining parameters.
108 : : */
109 : : void
110 : 144276 : pgpa_compute_identifier_by_rti(PlannerInfo *root, Index rti,
111 : : pgpa_identifier *rid)
112 : : {
113 : 144276 : Index top_rti = rti;
114 : 144276 : int occurrence = 1;
115 : 144276 : RangeTblEntry *rte;
116 : 144276 : RangeTblEntry *top_rte;
117 : 144276 : char *partnsp = NULL;
118 : 144276 : char *partrel = NULL;
119 : :
120 : : /*
121 : : * If this is a child RTE, find the topmost parent that is still of type
122 : : * RTE_RELATION. We do this because we identify children of partitioned
123 : : * tables by the name of the child table, but subqueries can also have
124 : : * child rels and we don't care about those here.
125 : : */
126 : 167752 : for (;;)
127 : : {
128 : 167752 : AppendRelInfo *appinfo;
129 : 167752 : RangeTblEntry *parent_rte;
130 : :
131 : : /* append_rel_array can be NULL if there are no children */
132 [ + + + + ]: 167752 : if (root->append_rel_array == NULL ||
133 : 47881 : (appinfo = root->append_rel_array[top_rti]) == NULL)
134 : 143734 : break;
135 : :
136 [ + - ]: 24018 : parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
137 [ + + ]: 24018 : if (parent_rte->rtekind != RTE_RELATION)
138 : 542 : break;
139 : :
140 : 23476 : top_rti = appinfo->parent_relid;
141 [ + + ]: 167752 : }
142 : :
143 : : /* Get the range table entries for the RTI and top RTI. */
144 [ + - ]: 144276 : rte = planner_rt_fetch(rti, root);
145 [ + - ]: 144276 : top_rte = planner_rt_fetch(top_rti, root);
146 [ + - ]: 144276 : Assert(rte->rtekind != RTE_JOIN);
147 [ + - ]: 144276 : Assert(top_rte->rtekind != RTE_JOIN);
148 : :
149 : : /* Work out the correct occurrence number. */
150 [ + + ]: 299707 : for (Index prior_rti = 1; prior_rti < top_rti; ++prior_rti)
151 : : {
152 : 155431 : RangeTblEntry *prior_rte;
153 : 155431 : AppendRelInfo *appinfo;
154 : :
155 : : /*
156 : : * If this is a child rel of a parent that is a relation, skip it.
157 : : *
158 : : * Such range table entries are disambiguated by mentioning the schema
159 : : * and name of the table, not by counting them as separate occurrences
160 : : * of the same table.
161 : : *
162 : : * NB: append_rel_array can be NULL if there are no children
163 : : */
164 [ + + + + ]: 155431 : if (root->append_rel_array != NULL &&
165 : 16951 : (appinfo = root->append_rel_array[prior_rti]) != NULL)
166 : : {
167 : 562 : RangeTblEntry *parent_rte;
168 : :
169 [ + - ]: 562 : parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
170 [ + - ]: 562 : if (parent_rte->rtekind == RTE_RELATION)
171 : 0 : continue;
172 [ - + ]: 562 : }
173 : :
174 : : /* Skip NULL entries and joins. */
175 [ + - ]: 155431 : prior_rte = planner_rt_fetch(prior_rti, root);
176 [ + + + + ]: 155431 : if (prior_rte == NULL || prior_rte->rtekind == RTE_JOIN)
177 : 31263 : continue;
178 : :
179 : : /* Skip if the alias name differs. */
180 [ + + ]: 124168 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
181 : 122511 : continue;
182 : :
183 : : /* Looks like a true duplicate. */
184 : 1657 : ++occurrence;
185 [ + + ]: 155431 : }
186 : :
187 : : /* If this is a child table, get the schema and relation names. */
188 [ + + ]: 144276 : if (rti != top_rti)
189 : : {
190 : 19204 : partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
191 : 19204 : partrel = get_rel_name(rte->relid);
192 : 19204 : }
193 : :
194 : : /* OK, we have all the answers we need. Return them to the caller. */
195 : 144276 : rid->alias_name = top_rte->eref->aliasname;
196 : 144276 : rid->occurrence = occurrence;
197 : 144276 : rid->partnsp = partnsp;
198 : 144276 : rid->partrel = partrel;
199 : 144276 : rid->plan_name = root->plan_name;
200 : 144276 : }
201 : :
202 : : /*
203 : : * Compute a relation identifier for a set of RTIs, except for any RTE_JOIN
204 : : * RTIs that may be present.
205 : : *
206 : : * RTE_JOIN entries are excluded because they cannot be mentioned by plan
207 : : * advice.
208 : : *
209 : : * The caller is responsible for making sure that the tkeys array is large
210 : : * enough to store the results.
211 : : *
212 : : * The return value is the number of identifiers computed.
213 : : */
214 : : int
215 : 44110 : pgpa_compute_identifiers_by_relids(PlannerInfo *root, Bitmapset *relids,
216 : : pgpa_identifier *rids)
217 : : {
218 : 44110 : int count = 0;
219 : 44110 : int rti = -1;
220 : :
221 [ + + ]: 97310 : while ((rti = bms_next_member(relids, rti)) >= 0)
222 : : {
223 [ + - ]: 53200 : RangeTblEntry *rte = planner_rt_fetch(rti, root);
224 : :
225 [ + + ]: 53200 : if (rte->rtekind == RTE_JOIN)
226 : 1477 : continue;
227 : 51723 : pgpa_compute_identifier_by_rti(root, rti, &rids[count++]);
228 [ - + + ]: 53200 : }
229 : :
230 [ + - ]: 44110 : Assert(count > 0);
231 : 88220 : return count;
232 : 44110 : }
233 : :
234 : : /*
235 : : * Create an array of range table identifiers for all the non-NULL,
236 : : * non-RTE_JOIN entries in the PlannedStmt's range table.
237 : : */
238 : : pgpa_identifier *
239 : 87097 : pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
240 : : {
241 : 87097 : Index rtable_length = list_length(pstmt->rtable);
242 : 87097 : pgpa_identifier *result = palloc0_array(pgpa_identifier, rtable_length);
243 : 87097 : Index *top_rti_map;
244 : 87097 : int rtinfoindex = 0;
245 : 87097 : SubPlanRTInfo *rtinfo = NULL;
246 : 87097 : SubPlanRTInfo *nextrtinfo = NULL;
247 : :
248 : : /*
249 : : * Account for relations addded by inheritance expansion of partitioned
250 : : * tables.
251 : : */
252 : 174194 : top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
253 : 87097 : pstmt->appendRelations);
254 : :
255 : : /*
256 : : * When we begin iterating, we're processing the portion of the range
257 : : * table that originated from the top-level PlannerInfo, so subrtinfo is
258 : : * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
259 : : * portion of the range table we are processing. nextrtinfo is always the
260 : : * SubPlanRTInfo that follows the current one, if any, so when we're
261 : : * processing the top-level query's portion of the range table, the next
262 : : * SubPlanRTInfo is the very first one.
263 : : */
264 [ + + ]: 87097 : if (pstmt->subrtinfos != NULL)
265 : 9054 : nextrtinfo = linitial(pstmt->subrtinfos);
266 : :
267 : : /* Main loop over the range table. */
268 [ + + ]: 275886 : for (Index rti = 1; rti <= rtable_length; rti++)
269 : : {
270 : 188789 : const char *plan_name;
271 : 188789 : Index top_rti;
272 : 188789 : RangeTblEntry *rte;
273 : 188789 : RangeTblEntry *top_rte;
274 : 188789 : char *partnsp = NULL;
275 : 188789 : char *partrel = NULL;
276 : 188789 : int occurrence;
277 : 188789 : pgpa_identifier *rid;
278 : :
279 : : /*
280 : : * Advance to the next SubPlanRTInfo, if it's time to do that.
281 : : *
282 : : * This loop probably shouldn't ever iterate more than once, because
283 : : * that would imply that a subquery was planned but added nothing to
284 : : * the range table; but let's be defensive and assume it can happen.
285 : : */
286 [ + + + + ]: 204745 : while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
287 : : {
288 : 15956 : rtinfo = nextrtinfo;
289 [ + + ]: 15956 : if (++rtinfoindex >= list_length(pstmt->subrtinfos))
290 : 9054 : nextrtinfo = NULL;
291 : : else
292 : 6902 : nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
293 : : }
294 : :
295 : : /* Fetch the range table entry, if any. */
296 : 188789 : rte = rt_fetch(rti, pstmt->rtable);
297 : :
298 : : /*
299 : : * We can't and don't need to identify null entries, and we don't want
300 : : * to identify join entries.
301 : : */
302 [ + - + + ]: 188789 : if (rte == NULL || rte->rtekind == RTE_JOIN)
303 : 16944 : continue;
304 : :
305 : : /*
306 : : * If this is not a relation added by partitioned table expansion,
307 : : * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
308 : : * we need the information for the top RTI/RTE, and must also fetch
309 : : * the partition schema and name.
310 : : */
311 : 171845 : top_rti = top_rti_map[rti - 1];
312 [ + + ]: 171845 : if (rti == top_rti)
313 : 159141 : top_rte = rte;
314 : : else
315 : : {
316 : 12704 : top_rte = rt_fetch(top_rti, pstmt->rtable);
317 : 12704 : partnsp =
318 : 12704 : get_namespace_name_or_temp(get_rel_namespace(rte->relid));
319 : 12704 : partrel = get_rel_name(rte->relid);
320 : : }
321 : :
322 : : /* Compute the correct occurrence number. */
323 : 343690 : occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
324 : 171845 : rtinfo, top_rti);
325 : :
326 : : /* Get the name of the current plan (NULL for toplevel query). */
327 [ + + ]: 171845 : plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
328 : :
329 : : /* Save all the details we've derived. */
330 : 171845 : rid = &result[rti - 1];
331 : 171845 : rid->alias_name = top_rte->eref->aliasname;
332 : 171845 : rid->occurrence = occurrence;
333 : 171845 : rid->partnsp = partnsp;
334 : 171845 : rid->partrel = partrel;
335 : 171845 : rid->plan_name = plan_name;
336 [ - + + ]: 188789 : }
337 : :
338 : 174194 : return result;
339 : 87097 : }
340 : :
341 : : /*
342 : : * Search for a pgpa_identifier in the array of identifiers computed for the
343 : : * range table. If exactly one match is found, return the matching RTI; else
344 : : * return 0.
345 : : */
346 : : Index
347 : 140 : pgpa_compute_rti_from_identifier(int rtable_length,
348 : : pgpa_identifier *rt_identifiers,
349 : : pgpa_identifier *rid)
350 : : {
351 : 140 : Index result = 0;
352 : :
353 [ + + - + ]: 705 : for (Index rti = 1; rti <= rtable_length; ++rti)
354 : : {
355 : 565 : pgpa_identifier *rti_rid = &rt_identifiers[rti - 1];
356 : :
357 : : /* If there's no identifier for this RTI, skip it. */
358 [ + + ]: 565 : if (rti_rid->alias_name == NULL)
359 : 103 : continue;
360 : :
361 : : /*
362 : : * If it matches, return this RTI. As usual, an omitted partition
363 : : * schema matches anything, but partition and plan names must either
364 : : * match exactly or be omitted on both sides.
365 : : */
366 [ + + ]: 462 : if (strcmp(rid->alias_name, rti_rid->alias_name) == 0 &&
367 [ + + ]: 197 : rid->occurrence == rti_rid->occurrence &&
368 [ + + + + ]: 193 : (rid->partnsp == NULL || rti_rid->partnsp == NULL ||
369 : 9 : strcmp(rid->partnsp, rti_rid->partnsp) == 0) &&
370 [ + + - + ]: 184 : strings_equal_or_both_null(rid->partrel, rti_rid->partrel) &&
371 : 140 : strings_equal_or_both_null(rid->plan_name, rti_rid->plan_name))
372 : : {
373 [ - + ]: 140 : if (result != 0)
374 : : {
375 : : /* Multiple matches were found. */
376 : 0 : return 0;
377 : : }
378 : 140 : result = rti;
379 : 140 : }
380 [ + - + ]: 565 : }
381 : :
382 : 140 : return result;
383 : 140 : }
384 : :
385 : : /*
386 : : * Build a mapping from each RTI to the RTI whose alias_name will be used to
387 : : * construct the range table identifier.
388 : : *
389 : : * For child relations, this is the topmost parent that is still of type
390 : : * RTE_RELATION. For other relations, it's just the original RTI.
391 : : *
392 : : * Since we're eventually going to need this information for every RTI in
393 : : * the range table, it's best to compute all the answers in a single pass over
394 : : * the AppendRelInfo list. Otherwise, we might end up searching through that
395 : : * list repeatedly for entries of interest.
396 : : *
397 : : * Note that the returned array is uses zero-based indexing, while RTIs use
398 : : * 1-based indexing, so subtract 1 from the RTI before looking it up in the
399 : : * array.
400 : : */
401 : : static Index *
402 : 87097 : pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
403 : : {
404 : 87097 : Index *top_rti_map = palloc0_array(Index, rtable_length);
405 : :
406 : : /* Initially, make every RTI point to itself. */
407 [ + + ]: 275886 : for (Index rti = 1; rti <= rtable_length; ++rti)
408 : 188789 : top_rti_map[rti - 1] = rti;
409 : :
410 : : /* Update the map for each AppendRelInfo object. */
411 [ + + + + : 189706 : foreach_node(AppendRelInfo, appinfo, appinfos)
+ + + + ]
412 : : {
413 : 15512 : Index parent_rti = appinfo->parent_relid;
414 : 15512 : RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
415 : :
416 : : /* If the parent is not RTE_RELATION, ignore this entry. */
417 [ + + ]: 15512 : if (parent_rte->rtekind != RTE_RELATION)
418 : 2808 : continue;
419 : :
420 : : /*
421 : : * Map the child to wherever we mapped the parent. Parents always
422 : : * precede their children in the AppendRelInfo list, so this should
423 : : * work out.
424 : : */
425 : 12704 : top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
426 [ - + + ]: 102609 : }
427 : :
428 : 174194 : return top_rti_map;
429 : 87097 : }
430 : :
431 : : /*
432 : : * Find the occurence number of a certain relation within a certain subquery.
433 : : *
434 : : * The same alias name can occur multiple times within a subquery, but we want
435 : : * to disambiguate by giving different occurrences different integer indexes.
436 : : * However, child tables are disambiguated by including the table name rather
437 : : * than by incrementing the occurrence number; and joins are not named and so
438 : : * shouldn't increment the occurence number either.
439 : : */
440 : : static int
441 : 171845 : pgpa_occurrence_number(List *rtable, Index *top_rti_map,
442 : : SubPlanRTInfo *rtinfo, Index rti)
443 : : {
444 [ + + ]: 171845 : Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
445 : 171845 : int occurrence = 1;
446 : 171845 : RangeTblEntry *rte = rt_fetch(rti, rtable);
447 : :
448 [ + + ]: 282627 : for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
449 : : {
450 : 110782 : RangeTblEntry *prior_rte;
451 : :
452 : : /*
453 : : * If this is a child rel of a parent that is a relation, skip it.
454 : : *
455 : : * Such range table entries are disambiguated by mentioning the schema
456 : : * and name of the table, not by counting them as separate occurrences
457 : : * of the same table.
458 : : */
459 [ - + ]: 110782 : if (top_rti_map[prior_rti - 1] != prior_rti)
460 : 0 : continue;
461 : :
462 : : /* Skip joins. */
463 : 110782 : prior_rte = rt_fetch(prior_rti, rtable);
464 [ + + ]: 110782 : if (prior_rte->rtekind == RTE_JOIN)
465 : 11822 : continue;
466 : :
467 : : /* Skip if the alias name differs. */
468 [ + + ]: 98960 : if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
469 : 89432 : continue;
470 : :
471 : : /* Looks like a true duplicate. */
472 : 9528 : ++occurrence;
473 [ - + + ]: 110782 : }
474 : :
475 : 343690 : return occurrence;
476 : 171845 : }
|