Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * joinpath.c
4 : : * Routines to find all possible paths for processing a set of joins
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/path/joinpath.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "executor/executor.h"
18 : : #include "foreign/fdwapi.h"
19 : : #include "nodes/nodeFuncs.h"
20 : : #include "optimizer/cost.h"
21 : : #include "optimizer/optimizer.h"
22 : : #include "optimizer/pathnode.h"
23 : : #include "optimizer/paths.h"
24 : : #include "optimizer/placeholder.h"
25 : : #include "optimizer/planmain.h"
26 : : #include "optimizer/restrictinfo.h"
27 : : #include "utils/lsyscache.h"
28 : : #include "utils/typcache.h"
29 : :
30 : : /* Hooks for plugins to get control in add_paths_to_joinrel() */
31 : : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
32 : : join_path_setup_hook_type join_path_setup_hook = NULL;
33 : :
34 : : /*
35 : : * Paths parameterized by a parent rel can be considered to be parameterized
36 : : * by any of its children, when we are performing partitionwise joins. These
37 : : * macros simplify checking for such cases. Beware multiple eval of args.
38 : : */
39 : : #define PATH_PARAM_BY_PARENT(path, rel) \
40 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
41 : : (rel)->top_parent_relids))
42 : : #define PATH_PARAM_BY_REL_SELF(path, rel) \
43 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
44 : :
45 : : #define PATH_PARAM_BY_REL(path, rel) \
46 : : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
47 : :
48 : : static void try_partial_mergejoin_path(PlannerInfo *root,
49 : : RelOptInfo *joinrel,
50 : : Path *outer_path,
51 : : Path *inner_path,
52 : : List *pathkeys,
53 : : List *mergeclauses,
54 : : List *outersortkeys,
55 : : List *innersortkeys,
56 : : JoinType jointype,
57 : : JoinPathExtraData *extra);
58 : : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
59 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
60 : : JoinType jointype, JoinPathExtraData *extra);
61 : : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
63 : : JoinType jointype, JoinPathExtraData *extra);
64 : : static void consider_parallel_nestloop(PlannerInfo *root,
65 : : RelOptInfo *joinrel,
66 : : RelOptInfo *outerrel,
67 : : RelOptInfo *innerrel,
68 : : JoinType jointype,
69 : : JoinPathExtraData *extra);
70 : : static void consider_parallel_mergejoin(PlannerInfo *root,
71 : : RelOptInfo *joinrel,
72 : : RelOptInfo *outerrel,
73 : : RelOptInfo *innerrel,
74 : : JoinType jointype,
75 : : JoinPathExtraData *extra,
76 : : Path *inner_cheapest_total);
77 : : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
79 : : JoinType jointype, JoinPathExtraData *extra);
80 : : static List *select_mergejoin_clauses(PlannerInfo *root,
81 : : RelOptInfo *joinrel,
82 : : RelOptInfo *outerrel,
83 : : RelOptInfo *innerrel,
84 : : List *restrictlist,
85 : : JoinType jointype,
86 : : bool *mergejoin_allowed);
87 : : static void generate_mergejoin_paths(PlannerInfo *root,
88 : : RelOptInfo *joinrel,
89 : : RelOptInfo *innerrel,
90 : : Path *outerpath,
91 : : JoinType jointype,
92 : : JoinPathExtraData *extra,
93 : : bool useallclauses,
94 : : Path *inner_cheapest_total,
95 : : List *merge_pathkeys,
96 : : bool is_partial);
97 : :
98 : :
99 : : /*
100 : : * add_paths_to_joinrel
101 : : * Given a join relation and two component rels from which it can be made,
102 : : * consider all possible paths that use the two component rels as outer
103 : : * and inner rel respectively. Add these paths to the join rel's pathlist
104 : : * if they survive comparison with other paths (and remove any existing
105 : : * paths that are dominated by these paths).
106 : : *
107 : : * Modifies the pathlist field of the joinrel node to contain the best
108 : : * paths found so far.
109 : : *
110 : : * jointype is not necessarily the same as sjinfo->jointype; it might be
111 : : * "flipped around" if we are considering joining the rels in the opposite
112 : : * direction from what's indicated in sjinfo.
113 : : *
114 : : * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
115 : : * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
116 : : * unique-ified and a regular inner join should then be applied. These values
117 : : * are not allowed to propagate outside this routine, however. Path cost
118 : : * estimation code, as well as match_unsorted_outer, may need to recognize that
119 : : * it's dealing with such a case --- the combination of nominal jointype INNER
120 : : * with sjinfo->jointype == JOIN_SEMI indicates that.
121 : : */
122 : : void
123 : 66556 : add_paths_to_joinrel(PlannerInfo *root,
124 : : RelOptInfo *joinrel,
125 : : RelOptInfo *outerrel,
126 : : RelOptInfo *innerrel,
127 : : JoinType jointype,
128 : : SpecialJoinInfo *sjinfo,
129 : : List *restrictlist)
130 : : {
131 : 66556 : JoinType save_jointype = jointype;
132 : 66556 : JoinPathExtraData extra;
133 : 66556 : bool mergejoin_allowed = true;
134 : 66556 : ListCell *lc;
135 : 66556 : Relids joinrelids;
136 : :
137 : : /*
138 : : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 : : * between child relations, even if there is a SpecialJoinInfo node for
140 : : * the join between the topmost parents. So, while calculating Relids set
141 : : * representing the restriction, consider relids of topmost parent of
142 : : * partitions.
143 : : */
144 [ + + ]: 66556 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 : 11010 : joinrelids = joinrel->top_parent_relids;
146 : : else
147 : 55546 : joinrelids = joinrel->relids;
148 : :
149 : 66556 : extra.restrictlist = restrictlist;
150 : 66556 : extra.mergeclause_list = NIL;
151 : 66556 : extra.sjinfo = sjinfo;
152 : 66556 : extra.param_source_rels = NULL;
153 : 66556 : extra.pgs_mask = joinrel->pgs_mask;
154 : :
155 : : /*
156 : : * Give extensions a chance to take control. In particular, an extension
157 : : * might want to modify extra.pgs_mask. It's possible to override pgs_mask
158 : : * on a query-wide basis using join_search_hook, or for a particular
159 : : * relation using joinrel_setup_hook, but extensions that want to provide
160 : : * different advice for the same joinrel based on the choice of innerrel
161 : : * and outerrel will need to use this hook.
162 : : *
163 : : * A very simple way for an extension to use this hook is to set
164 : : * extra.pgs_mask &= ~PGS_JOIN_ANY, if it simply doesn't want any of the
165 : : * paths generated by this call to add_paths_to_joinrel() to be selected.
166 : : * An extension could use this technique to constrain the join order,
167 : : * since it could thereby arrange to reject all paths from join orders
168 : : * that it does not like. An extension can also selectively clear bits
169 : : * from extra.pgs_mask to rule out specific techniques for specific joins,
170 : : * or could even set additional bits to re-allow methods disabled at some
171 : : * higher level.
172 : : *
173 : : * NB: Below this point, this function should be careful to reference
174 : : * extra.pgs_mask rather than rel->pgs_mask to avoid disregarding any
175 : : * changes made by the hook we're about to call.
176 : : */
177 [ + + ]: 66556 : if (join_path_setup_hook)
178 : 133060 : join_path_setup_hook(root, joinrel, outerrel, innerrel,
179 : 66530 : jointype, &extra);
180 : :
181 : : /*
182 : : * See if the inner relation is provably unique for this outer rel.
183 : : *
184 : : * We have some special cases: for JOIN_SEMI, it doesn't matter since the
185 : : * executor can make the equivalent optimization anyway. It also doesn't
186 : : * help enable use of Memoize, since a semijoin with a provably unique
187 : : * inner side should have been reduced to an inner join in that case.
188 : : * Therefore, we need not expend planner cycles on proofs. (For
189 : : * JOIN_ANTI, although it doesn't help the executor for the same reason,
190 : : * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
191 : : * considering a semijoin whose inner side is not provably unique (else
192 : : * reduce_unique_semijoins would've simplified it), so there's no point in
193 : : * calling innerrel_is_unique. However, if the LHS covers all of the
194 : : * semijoin's min_lefthand, then it's appropriate to set inner_unique
195 : : * because the unique relation produced by create_unique_paths will be
196 : : * unique relative to the LHS. (If we have an LHS that's only part of the
197 : : * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
198 : : * JOIN_INNER to avoid letting that value escape this module.
199 : : */
200 [ + + + + ]: 66556 : switch (jointype)
201 : : {
202 : : case JOIN_SEMI:
203 : 1042 : extra.inner_unique = false; /* well, unproven */
204 : 1042 : break;
205 : : case JOIN_UNIQUE_INNER:
206 : 1602 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
207 : 801 : outerrel->relids);
208 : 801 : break;
209 : : case JOIN_UNIQUE_OUTER:
210 : 1602 : extra.inner_unique = innerrel_is_unique(root,
211 : 801 : joinrel->relids,
212 : 801 : outerrel->relids,
213 : 801 : innerrel,
214 : : JOIN_INNER,
215 : 801 : restrictlist,
216 : : false);
217 : 801 : break;
218 : : default:
219 : 127824 : extra.inner_unique = innerrel_is_unique(root,
220 : 63912 : joinrel->relids,
221 : 63912 : outerrel->relids,
222 : 63912 : innerrel,
223 : 63912 : jointype,
224 : 63912 : restrictlist,
225 : : false);
226 : 63912 : break;
227 : : }
228 : :
229 : : /*
230 : : * If the outer or inner relation has been unique-ified, handle as a plain
231 : : * inner join.
232 : : */
233 [ + + + + ]: 66556 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
234 : 1602 : jointype = JOIN_INNER;
235 : :
236 : : /*
237 : : * Find potential mergejoin clauses. We can skip this if we are not
238 : : * interested in doing a mergejoin. However, mergejoin may be our only
239 : : * way of implementing a full outer join, so in that case we don't care
240 : : * whether mergejoins are disabled.
241 : : */
242 [ + + + + ]: 66556 : if ((extra.pgs_mask & PGS_MERGEJOIN_ANY) != 0 || jointype == JOIN_FULL)
243 : 131440 : extra.mergeclause_list = select_mergejoin_clauses(root,
244 : 65720 : joinrel,
245 : 65720 : outerrel,
246 : 65720 : innerrel,
247 : 65720 : restrictlist,
248 : 65720 : jointype,
249 : : &mergejoin_allowed);
250 : :
251 : : /*
252 : : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
253 : : * for cost estimation. These will be the same for all paths.
254 : : */
255 [ + + + + : 66556 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
+ + ]
256 : 37118 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
257 : 18559 : jointype, sjinfo, restrictlist,
258 : 18559 : &extra.semifactors);
259 : :
260 : : /*
261 : : * Decide whether it's sensible to generate parameterized paths for this
262 : : * joinrel, and if so, which relations such paths should require. There
263 : : * is usually no need to create a parameterized result path unless there
264 : : * is a join order restriction that prevents joining one of our input rels
265 : : * directly to the parameter source rel instead of joining to the other
266 : : * input rel. (But see allow_star_schema_join().) This restriction
267 : : * reduces the number of parameterized paths we have to deal with at
268 : : * higher join levels, without compromising the quality of the resulting
269 : : * plan. We express the restriction as a Relids set that must overlap the
270 : : * parameterization of any proposed join path. Note: param_source_rels
271 : : * should contain only baserels, not OJ relids, so starting from
272 : : * all_baserels not all_query_rels is correct.
273 : : */
274 [ + + + + : 108048 : foreach(lc, root->join_info_list)
+ + ]
275 : : {
276 : 41492 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
277 : :
278 : : /*
279 : : * SJ is relevant to this join if we have some part of its RHS
280 : : * (possibly not all of it), and haven't yet joined to its LHS. (This
281 : : * test is pretty simplistic, but should be sufficient considering the
282 : : * join has already been proven legal.) If the SJ is relevant, it
283 : : * presents constraints for joining to anything not in its RHS.
284 : : */
285 [ + + + + ]: 41492 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
286 : 27554 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
287 : 4036 : extra.param_source_rels = bms_join(extra.param_source_rels,
288 : 4036 : bms_difference(root->all_baserels,
289 : 2018 : sjinfo2->min_righthand));
290 : :
291 : : /* full joins constrain both sides symmetrically */
292 [ + + ]: 41492 : if (sjinfo2->jointype == JOIN_FULL &&
293 [ + + + + ]: 734 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
294 : 730 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
295 : 200 : extra.param_source_rels = bms_join(extra.param_source_rels,
296 : 200 : bms_difference(root->all_baserels,
297 : 100 : sjinfo2->min_lefthand));
298 : 41492 : }
299 : :
300 : : /*
301 : : * However, when a LATERAL subquery is involved, there will simply not be
302 : : * any paths for the joinrel that aren't parameterized by whatever the
303 : : * subquery is parameterized by, unless its parameterization is resolved
304 : : * within the joinrel. So we might as well allow additional dependencies
305 : : * on whatever residual lateral dependencies the joinrel will have.
306 : : */
307 : 133112 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
308 : 66556 : joinrel->lateral_relids);
309 : :
310 : : /*
311 : : * 1. Consider mergejoin paths where both relations must be explicitly
312 : : * sorted. Skip this if we can't mergejoin.
313 : : */
314 [ + + ]: 66556 : if (mergejoin_allowed)
315 : 130306 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
316 : 65153 : jointype, &extra);
317 : :
318 : : /*
319 : : * 2. Consider paths where the outer relation need not be explicitly
320 : : * sorted. This includes both nestloops and mergejoins where the outer
321 : : * path is already ordered. Again, skip this if we can't mergejoin.
322 : : * (That's okay because we know that nestloop can't handle
323 : : * right/right-anti/right-semi/full joins at all, so it wouldn't work in
324 : : * the prohibited cases either.)
325 : : */
326 [ + + ]: 66556 : if (mergejoin_allowed)
327 : 130306 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
328 : 65153 : jointype, &extra);
329 : :
330 : : #ifdef NOT_USED
331 : :
332 : : /*
333 : : * 3. Consider paths where the inner relation need not be explicitly
334 : : * sorted. This includes mergejoins only (nestloops were already built in
335 : : * match_unsorted_outer).
336 : : *
337 : : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
338 : : * significant difference between the inner and outer side of a mergejoin,
339 : : * so match_unsorted_inner creates no paths that aren't equivalent to
340 : : * those made by match_unsorted_outer when add_paths_to_joinrel() is
341 : : * invoked with the two rels given in the other order.
342 : : */
343 : : if (mergejoin_allowed)
344 : : match_unsorted_inner(root, joinrel, outerrel, innerrel,
345 : : jointype, &extra);
346 : : #endif
347 : :
348 : : /*
349 : : * 4. Consider paths where both outer and inner relations must be hashed
350 : : * before being joined. As above, when it's a full join, we must try this
351 : : * even when the path type is disabled, because it may be our only option.
352 : : */
353 [ + + + + ]: 66556 : if ((extra.pgs_mask & PGS_HASHJOIN) != 0 || jointype == JOIN_FULL)
354 : 131190 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
355 : 65595 : jointype, &extra);
356 : :
357 : : /*
358 : : * 5. If inner and outer relations are foreign tables (or joins) belonging
359 : : * to the same server and assigned to the same user to check access
360 : : * permissions as, give the FDW a chance to push down joins.
361 : : */
362 [ + + - + : 66556 : if ((extra.pgs_mask & PGS_FOREIGNJOIN) != 0 && joinrel->fdwroutine &&
# # ]
363 : 0 : joinrel->fdwroutine->GetForeignJoinPaths)
364 : 0 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
365 : 0 : outerrel, innerrel,
366 : 0 : save_jointype, &extra);
367 : :
368 : : /*
369 : : * 6. Finally, give extensions a chance to manipulate the path list. They
370 : : * could add new paths (such as CustomPaths) by calling add_path(), or
371 : : * add_partial_path() if parallel aware.
372 : : *
373 : : * In theory, extensions could also use this hook to delete or modify
374 : : * paths added by the core code, but in practice this is difficult to make
375 : : * work, since it's too late to get back any paths that have already been
376 : : * discarded by add_path() or add_partial_path(). If you're trying to
377 : : * suppress paths, consider using join_path_setup_hook instead.
378 : : */
379 [ + - ]: 66556 : if (set_join_pathlist_hook)
380 : 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
381 : 0 : save_jointype, &extra);
382 : 66556 : }
383 : :
384 : : /*
385 : : * We override the param_source_rels heuristic to accept nestloop paths in
386 : : * which the outer rel satisfies some but not all of the inner path's
387 : : * parameterization. This is necessary to get good plans for star-schema
388 : : * scenarios, in which a parameterized path for a large table may require
389 : : * parameters from multiple small tables that will not get joined directly to
390 : : * each other. We can handle that by stacking nestloops that have the small
391 : : * tables on the outside; but this breaks the rule the param_source_rels
392 : : * heuristic is based on, namely that parameters should not be passed down
393 : : * across joins unless there's a join-order-constraint-based reason to do so.
394 : : * So we ignore the param_source_rels restriction when this case applies.
395 : : *
396 : : * allow_star_schema_join() returns true if the param_source_rels restriction
397 : : * should be overridden, ie, it's okay to perform this join.
398 : : */
399 : : static inline bool
400 : 22096 : allow_star_schema_join(PlannerInfo *root,
401 : : Relids outerrelids,
402 : : Relids inner_paramrels)
403 : : {
404 : : /*
405 : : * It's a star-schema case if the outer rel provides some but not all of
406 : : * the inner rel's parameterization.
407 : : */
408 [ + + ]: 25532 : return (bms_overlap(inner_paramrels, outerrelids) &&
409 : 3436 : bms_nonempty_difference(inner_paramrels, outerrelids));
410 : : }
411 : :
412 : : /*
413 : : * If the parameterization is only partly satisfied by the outer rel,
414 : : * the unsatisfied part can't include any outer-join relids that could
415 : : * null rels of the satisfied part. That would imply that we're trying
416 : : * to use a clause involving a Var with nonempty varnullingrels at
417 : : * a join level where that value isn't yet computable.
418 : : *
419 : : * In practice, this test never finds a problem because earlier join order
420 : : * restrictions prevent us from attempting a join that would cause a problem.
421 : : * (That's unsurprising, because the code worked before we ever added
422 : : * outer-join relids to expression relids.) It still seems worth checking
423 : : * as a backstop, but we only do so in assert-enabled builds.
424 : : */
425 : : #ifdef USE_ASSERT_CHECKING
426 : : static inline bool
427 : 251373 : have_unsafe_outer_join_ref(PlannerInfo *root,
428 : : Relids outerrelids,
429 : : Relids inner_paramrels)
430 : : {
431 : 251373 : bool result = false;
432 : 251373 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
433 : 251373 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
434 : :
435 [ + + ]: 251373 : if (bms_overlap(unsatisfied, root->outer_join_rels))
436 : : {
437 : 10 : ListCell *lc;
438 : :
439 [ + - + + : 30 : foreach(lc, root->join_info_list)
+ + ]
440 : : {
441 : 20 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
442 : :
443 [ + + ]: 20 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
444 : 4 : continue; /* not relevant */
445 [ + - # # ]: 16 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
446 [ - + ]: 16 : (sjinfo->jointype == JOIN_FULL &&
447 : 0 : bms_overlap(satisfied, sjinfo->min_lefthand)))
448 : : {
449 : 0 : result = true; /* doesn't work */
450 : 0 : break;
451 : : }
452 [ + - + ]: 20 : }
453 : 10 : }
454 : :
455 : : /* Waste no memory when we reject a path here */
456 : 251373 : bms_free(unsatisfied);
457 : 251373 : bms_free(satisfied);
458 : :
459 : 502746 : return result;
460 : 251373 : }
461 : : #endif /* USE_ASSERT_CHECKING */
462 : :
463 : : /*
464 : : * paraminfo_get_equal_hashops
465 : : * Determine if the clauses in param_info and innerrel's lateral vars
466 : : * can be hashed.
467 : : * Returns true if hashing is possible, otherwise false.
468 : : *
469 : : * Additionally, on success we collect the outer expressions and the
470 : : * appropriate equality operators for each hashable parameter to innerrel.
471 : : * These are returned in parallel lists in *param_exprs and *operators.
472 : : * We also set *binary_mode to indicate whether strict binary matching is
473 : : * required.
474 : : */
475 : : static bool
476 : 25046 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
477 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
478 : : List *ph_lateral_vars, List **param_exprs,
479 : : List **operators, bool *binary_mode)
480 : :
481 : : {
482 : 25046 : List *lateral_vars;
483 : 25046 : ListCell *lc;
484 : :
485 : 25046 : *param_exprs = NIL;
486 : 25046 : *operators = NIL;
487 : 25046 : *binary_mode = false;
488 : :
489 : : /* Add join clauses from param_info to the hash key */
490 [ - + ]: 25046 : if (param_info != NULL)
491 : : {
492 : 25046 : List *clauses = param_info->ppi_clauses;
493 : :
494 [ + + + + : 51053 : foreach(lc, clauses)
+ + + + ]
495 : : {
496 : 26007 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
497 : 26007 : OpExpr *opexpr;
498 : 26007 : Node *expr;
499 : 26007 : Oid hasheqoperator;
500 : :
501 : 26007 : opexpr = (OpExpr *) rinfo->clause;
502 : :
503 : : /*
504 : : * Bail if the rinfo is not compatible. We need a join OpExpr
505 : : * with 2 args.
506 : : */
507 [ + + + - : 26007 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
+ + ]
508 : 50754 : !clause_sides_match_join(rinfo, outerrel->relids,
509 : 25377 : innerrel->relids))
510 : : {
511 : 5111 : list_free(*operators);
512 : 5111 : list_free(*param_exprs);
513 : 5111 : return false;
514 : : }
515 : :
516 [ + + ]: 20896 : if (rinfo->outer_is_left)
517 : : {
518 : 11228 : expr = (Node *) linitial(opexpr->args);
519 : 11228 : hasheqoperator = rinfo->left_hasheqoperator;
520 : 11228 : }
521 : : else
522 : : {
523 : 9668 : expr = (Node *) lsecond(opexpr->args);
524 : 9668 : hasheqoperator = rinfo->right_hasheqoperator;
525 : : }
526 : :
527 : : /* can't do memoize if we can't hash the outer type */
528 [ + + ]: 20896 : if (!OidIsValid(hasheqoperator))
529 : : {
530 : 15 : list_free(*operators);
531 : 15 : list_free(*param_exprs);
532 : 15 : return false;
533 : : }
534 : :
535 : : /*
536 : : * 'expr' may already exist as a parameter from a previous item in
537 : : * ppi_clauses. No need to include it again, however we'd better
538 : : * ensure we do switch into binary mode if required. See below.
539 : : */
540 [ + + ]: 20881 : if (!list_member(*param_exprs, expr))
541 : : {
542 : 20880 : *operators = lappend_oid(*operators, hasheqoperator);
543 : 20880 : *param_exprs = lappend(*param_exprs, expr);
544 : 20880 : }
545 : :
546 : : /*
547 : : * When the join operator is not hashable then it's possible that
548 : : * the operator will be able to distinguish something that the
549 : : * hash equality operator could not. For example with floating
550 : : * point types -0.0 and +0.0 are classed as equal by the hash
551 : : * function and equality function, but some other operator may be
552 : : * able to tell those values apart. This means that we must put
553 : : * memoize into binary comparison mode so that it does bit-by-bit
554 : : * comparisons rather than a "logical" comparison as it would
555 : : * using the hash equality operator.
556 : : */
557 [ + + ]: 20881 : if (!OidIsValid(rinfo->hashjoinoperator))
558 : 121 : *binary_mode = true;
559 [ + + ]: 26007 : }
560 [ + + ]: 25046 : }
561 : :
562 : : /* Now add any lateral vars to the cache key too */
563 : 19920 : lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
564 [ + + + + : 20601 : foreach(lc, lateral_vars)
+ + + + ]
565 : : {
566 : 681 : Node *expr = (Node *) lfirst(lc);
567 : 681 : TypeCacheEntry *typentry;
568 : :
569 : : /* Reject if there are any volatile functions in lateral vars */
570 [ - + ]: 681 : if (contain_volatile_functions(expr))
571 : : {
572 : 0 : list_free(*operators);
573 : 0 : list_free(*param_exprs);
574 : 0 : return false;
575 : : }
576 : :
577 : 681 : typentry = lookup_type_cache(exprType(expr),
578 : : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
579 : :
580 : : /* can't use memoize without a valid hash proc and equals operator */
581 [ + + - + ]: 681 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
582 : : {
583 : 33 : list_free(*operators);
584 : 33 : list_free(*param_exprs);
585 : 33 : return false;
586 : : }
587 : :
588 : : /*
589 : : * 'expr' may already exist as a parameter from the ppi_clauses. No
590 : : * need to include it again, however we'd better ensure we do switch
591 : : * into binary mode.
592 : : */
593 [ + + ]: 648 : if (!list_member(*param_exprs, expr))
594 : : {
595 : 573 : *operators = lappend_oid(*operators, typentry->eq_opr);
596 : 573 : *param_exprs = lappend(*param_exprs, expr);
597 : 573 : }
598 : :
599 : : /*
600 : : * We must go into binary mode as we don't have too much of an idea of
601 : : * how these lateral Vars are being used. See comment above when we
602 : : * set *binary_mode for the non-lateral Var case. This could be
603 : : * relaxed a bit if we had the RestrictInfos and knew the operators
604 : : * being used, however for cases like Vars that are arguments to
605 : : * functions we must operate in binary mode as we don't have
606 : : * visibility into what the function is doing with the Vars.
607 : : */
608 : 648 : *binary_mode = true;
609 [ + + ]: 681 : }
610 : :
611 : : /* We're okay to use memoize */
612 : 19887 : return true;
613 : 25046 : }
614 : :
615 : : /*
616 : : * extract_lateral_vars_from_PHVs
617 : : * Extract lateral references within PlaceHolderVars that are due to be
618 : : * evaluated at 'innerrelids'.
619 : : */
620 : : static List *
621 : 109761 : extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
622 : : {
623 : 109761 : List *ph_lateral_vars = NIL;
624 : 109761 : ListCell *lc;
625 : :
626 : : /* Nothing would be found if the query contains no LATERAL RTEs */
627 [ + + ]: 109761 : if (!root->hasLateralRTEs)
628 : 106914 : return NIL;
629 : :
630 : : /*
631 : : * No need to consider PHVs that are due to be evaluated at joinrels,
632 : : * since we do not add Memoize nodes on top of joinrel paths.
633 : : */
634 [ + + ]: 2847 : if (bms_membership(innerrelids) == BMS_MULTIPLE)
635 : 921 : return NIL;
636 : :
637 [ + + + + : 2402 : foreach(lc, root->placeholder_list)
+ + ]
638 : : {
639 : 476 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
640 : 476 : List *vars;
641 : 476 : ListCell *cell;
642 : :
643 : : /* PHV is uninteresting if no lateral refs */
644 [ + + ]: 476 : if (phinfo->ph_lateral == NULL)
645 : 273 : continue;
646 : :
647 : : /* PHV is uninteresting if not due to be evaluated at innerrelids */
648 [ + + ]: 203 : if (!bms_equal(phinfo->ph_eval_at, innerrelids))
649 : 163 : continue;
650 : :
651 : : /*
652 : : * If the PHV does not reference any rels in innerrelids, use its
653 : : * contained expression as a cache key rather than extracting the
654 : : * Vars/PHVs from it and using those. This can be beneficial in cases
655 : : * where the expression results in fewer distinct values to cache
656 : : * tuples for.
657 : : */
658 [ + + + + ]: 80 : if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
659 : 40 : innerrelids))
660 : : {
661 : 38 : ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
662 : 38 : continue;
663 : : }
664 : :
665 : : /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
666 : 2 : vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
667 [ + - + + : 6 : foreach(cell, vars)
+ + ]
668 : : {
669 : 4 : Node *node = (Node *) lfirst(cell);
670 : :
671 [ + + ]: 4 : if (IsA(node, Var))
672 : : {
673 : 2 : Var *var = (Var *) node;
674 : :
675 [ - + ]: 2 : Assert(var->varlevelsup == 0);
676 : :
677 [ + - ]: 2 : if (bms_is_member(var->varno, phinfo->ph_lateral))
678 : 0 : ph_lateral_vars = lappend(ph_lateral_vars, node);
679 : 2 : }
680 [ - + ]: 2 : else if (IsA(node, PlaceHolderVar))
681 : : {
682 : 2 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
683 : :
684 [ - + ]: 2 : Assert(phv->phlevelsup == 0);
685 : :
686 [ - + - + ]: 4 : if (bms_is_subset(find_placeholder_info(root, phv)->ph_eval_at,
687 : 2 : phinfo->ph_lateral))
688 : 2 : ph_lateral_vars = lappend(ph_lateral_vars, node);
689 : 2 : }
690 : : else
691 : 0 : Assert(false);
692 : 4 : }
693 : :
694 : 2 : list_free(vars);
695 [ - + + ]: 476 : }
696 : :
697 : 1926 : return ph_lateral_vars;
698 : 109761 : }
699 : :
700 : : /*
701 : : * get_memoize_path
702 : : * If possible, make and return a Memoize path atop of 'inner_path'.
703 : : * Otherwise return NULL.
704 : : *
705 : : * Note that currently we do not add Memoize nodes on top of join relation
706 : : * paths. This is because the ParamPathInfos for join relation paths do not
707 : : * maintain ppi_clauses, as the set of relevant clauses varies depending on how
708 : : * the join is formed. In addition, joinrels do not maintain lateral_vars. So
709 : : * we do not have a way to extract cache keys from joinrels.
710 : : */
711 : : static Path *
712 : 175138 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
713 : : RelOptInfo *outerrel, Path *inner_path,
714 : : Path *outer_path, JoinType jointype,
715 : : JoinPathExtraData *extra)
716 : : {
717 : 175138 : List *param_exprs;
718 : 175138 : List *hash_operators;
719 : 175138 : ListCell *lc;
720 : 175138 : bool binary_mode;
721 : 175138 : List *ph_lateral_vars;
722 : :
723 : : /* Obviously not if it's disabled */
724 [ + + ]: 175138 : if ((extra->pgs_mask & PGS_NESTLOOP_MEMOIZE) == 0)
725 : 1492 : return NULL;
726 : :
727 : : /*
728 : : * We can safely not bother with all this unless we expect to perform more
729 : : * than one inner scan. The first scan is always going to be a cache
730 : : * miss. This would likely fail later anyway based on costs, so this is
731 : : * really just to save some wasted effort.
732 : : */
733 [ + + ]: 173646 : if (outer_path->parent->rows < 2)
734 : 59687 : return NULL;
735 : :
736 : : /*
737 : : * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
738 : : * evaluated at innerrel. These lateral Vars/PHVs could be used as
739 : : * memoize cache keys.
740 : : */
741 : 113959 : ph_lateral_vars = extract_lateral_vars_from_PHVs(root, innerrel->relids);
742 : :
743 : : /*
744 : : * We can only have a memoize node when there's some kind of cache key,
745 : : * either parameterized path clauses or lateral Vars. No cache key sounds
746 : : * more like something a Materialize node might be more useful for.
747 : : */
748 [ + + ]: 113959 : if ((inner_path->param_info == NULL ||
749 : 31970 : inner_path->param_info->ppi_clauses == NIL) &&
750 [ + + + + ]: 81989 : innerrel->lateral_vars == NIL &&
751 : 79890 : ph_lateral_vars == NIL)
752 : 79878 : return NULL;
753 : :
754 : : /*
755 : : * Currently we don't do this for SEMI and ANTI joins, because nested loop
756 : : * SEMI/ANTI joins don't scan the inner node to completion, which means
757 : : * memoize cannot mark the cache entry as complete. Nor can we mark the
758 : : * cache entry as complete after fetching the first inner tuple, because
759 : : * if that tuple and the current outer tuple don't satisfy the join
760 : : * clauses, a second inner tuple that satisfies the parameters would find
761 : : * the cache entry already marked as complete. The only exception is when
762 : : * the inner relation is provably unique, as in that case, there won't be
763 : : * a second matching tuple and we can safely mark the cache entry as
764 : : * complete after fetching the first inner tuple. Note that in such
765 : : * cases, the SEMI join should have been reduced to an inner join by
766 : : * reduce_unique_semijoins.
767 : : */
768 [ + + + + ]: 34081 : if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
769 : 29883 : !extra->inner_unique)
770 : 472 : return NULL;
771 : :
772 : : /*
773 : : * Memoize normally marks cache entries as complete when it runs out of
774 : : * tuples to read from its subplan. However, with unique joins, Nested
775 : : * Loop will skip to the next outer tuple after finding the first matching
776 : : * inner tuple. This means that we may not read the inner side of the
777 : : * join to completion which leaves no opportunity to mark the cache entry
778 : : * as complete. To work around that, when the join is unique we
779 : : * automatically mark cache entries as complete after fetching the first
780 : : * tuple. This works when the entire join condition is parameterized.
781 : : * Otherwise, when the parameterization is only a subset of the join
782 : : * condition, we can't be sure which part of it causes the join to be
783 : : * unique. This means there are no guarantees that only 1 tuple will be
784 : : * read. We cannot mark the cache entry as complete after reading the
785 : : * first tuple without that guarantee. This means the scope of Memoize
786 : : * node's usefulness is limited to only outer rows that have no join
787 : : * partner as this is the only case where Nested Loop would exhaust the
788 : : * inner scan of a unique join. Since the scope is limited to that, we
789 : : * just don't bother making a memoize path in this case.
790 : : *
791 : : * Lateral vars needn't be considered here as they're not considered when
792 : : * determining if the join is unique.
793 : : */
794 [ + + ]: 29411 : if (extra->inner_unique)
795 : : {
796 : 15025 : Bitmapset *ppi_serials;
797 : :
798 [ + - ]: 15025 : if (inner_path->param_info == NULL)
799 : 0 : return NULL;
800 : :
801 : 15025 : ppi_serials = inner_path->param_info->ppi_serials;
802 : :
803 [ + + + - : 41998 : foreach_node(RestrictInfo, rinfo, extra->restrictlist)
+ + + + +
+ + + ]
804 : : {
805 [ + + ]: 16287 : if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
806 : 4339 : return NULL;
807 : 22634 : }
808 [ + + ]: 15025 : }
809 : :
810 : : /*
811 : : * We can't use a memoize node if there are volatile functions in the
812 : : * inner rel's target list or restrict list. A cache hit could reduce the
813 : : * number of calls to these functions.
814 : : */
815 [ - + ]: 25072 : if (contain_volatile_functions((Node *) innerrel->reltarget))
816 : 0 : return NULL;
817 : :
818 [ + + + + : 42863 : foreach(lc, innerrel->baserestrictinfo)
+ + + + ]
819 : : {
820 : 17791 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
821 : :
822 [ + + ]: 17791 : if (contain_volatile_functions((Node *) rinfo))
823 : 26 : return NULL;
824 [ + + ]: 17791 : }
825 : :
826 : : /*
827 : : * Also check the parameterized path restrictinfos for volatile functions.
828 : : * Indexed functions must be immutable so shouldn't have any volatile
829 : : * functions, however, with a lateral join the inner scan may not be an
830 : : * index scan.
831 : : */
832 [ + - ]: 25046 : if (inner_path->param_info != NULL)
833 : : {
834 [ + + + + : 52586 : foreach(lc, inner_path->param_info->ppi_clauses)
+ + - + ]
835 : : {
836 : 27540 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
837 : :
838 [ - + ]: 27540 : if (contain_volatile_functions((Node *) rinfo))
839 : 0 : return NULL;
840 [ - + ]: 27540 : }
841 : 25046 : }
842 : :
843 : : /* Check if we have hash ops for each parameter to the path */
844 [ + + ]: 50092 : if (paraminfo_get_equal_hashops(root,
845 : 25046 : inner_path->param_info,
846 [ + + ]: 25046 : outerrel->top_parent ?
847 : 25046 : outerrel->top_parent : outerrel,
848 : 25046 : innerrel,
849 : 25046 : ph_lateral_vars,
850 : : ¶m_exprs,
851 : : &hash_operators,
852 : : &binary_mode))
853 : : {
854 : 39774 : return (Path *) create_memoize_path(root,
855 : 19887 : innerrel,
856 : 19887 : inner_path,
857 : 19887 : param_exprs,
858 : 19887 : hash_operators,
859 : 19887 : extra->inner_unique,
860 : 19887 : binary_mode,
861 : 19887 : outer_path->rows);
862 : : }
863 : :
864 : 5159 : return NULL;
865 : 170940 : }
866 : :
867 : : /*
868 : : * try_nestloop_path
869 : : * Consider a nestloop join path; if it appears useful, push it into
870 : : * the joinrel's pathlist via add_path().
871 : : */
872 : : static void
873 : 273425 : try_nestloop_path(PlannerInfo *root,
874 : : RelOptInfo *joinrel,
875 : : Path *outer_path,
876 : : Path *inner_path,
877 : : List *pathkeys,
878 : : JoinType jointype,
879 : : uint64 nestloop_subtype,
880 : : JoinPathExtraData *extra)
881 : : {
882 : 273425 : Relids required_outer;
883 : 273425 : JoinCostWorkspace workspace;
884 : 273425 : RelOptInfo *innerrel = inner_path->parent;
885 : 273425 : RelOptInfo *outerrel = outer_path->parent;
886 : 273425 : Relids innerrelids;
887 : 273425 : Relids outerrelids;
888 [ + + ]: 273425 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
889 [ + + ]: 273425 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
890 : :
891 : : /*
892 : : * If we are forming an outer join at this join, it's nonsensical to use
893 : : * an input path that uses the outer join as part of its parameterization.
894 : : * (This can happen despite our join order restrictions, since those apply
895 : : * to what is in an input relation not what its parameters are.)
896 : : */
897 [ + + + + ]: 299975 : if (extra->sjinfo->ojrelid != 0 &&
898 [ + - ]: 26550 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
899 : 26550 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
900 : 20 : return;
901 : :
902 : : /*
903 : : * Any parameterization of the input paths refers to topmost parents of
904 : : * the relevant relations, because reparameterize_path_by_child() hasn't
905 : : * been called yet. So we must consider topmost parents of the relations
906 : : * being joined, too, while determining parameterization of the result and
907 : : * checking for disallowed parameterization cases.
908 : : */
909 [ + + ]: 273405 : if (innerrel->top_parent_relids)
910 : 31058 : innerrelids = innerrel->top_parent_relids;
911 : : else
912 : 242347 : innerrelids = innerrel->relids;
913 : :
914 [ + + ]: 273405 : if (outerrel->top_parent_relids)
915 : 31058 : outerrelids = outerrel->top_parent_relids;
916 : : else
917 : 242347 : outerrelids = outerrel->relids;
918 : :
919 : : /*
920 : : * Check to see if proposed path is still parameterized, and reject if the
921 : : * parameterization wouldn't be sensible --- unless allow_star_schema_join
922 : : * says to allow it anyway.
923 : : */
924 : 546810 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
925 : 273405 : innerrelids, inner_paramrels);
926 [ + + ]: 273405 : if (required_outer &&
927 [ + + + + ]: 24597 : !bms_overlap(required_outer, extra->param_source_rels) &&
928 : 22096 : !allow_star_schema_join(root, outerrelids, inner_paramrels))
929 : : {
930 : : /* Waste no memory when we reject a path here */
931 : 22032 : bms_free(required_outer);
932 : 22032 : return;
933 : : }
934 : :
935 : : /* If we got past that, we shouldn't have any unsafe outer-join refs */
936 [ + - ]: 251373 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
937 : :
938 : : /*
939 : : * If the inner path is parameterized, it is parameterized by the topmost
940 : : * parent of the outer rel, not the outer rel itself. We will need to
941 : : * translate the parameterization, if this path is chosen, during
942 : : * create_plan(). Here we just check whether we will be able to perform
943 : : * the translation, and if not avoid creating a nestloop path.
944 : : */
945 [ + + + - : 251373 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + + - ]
946 : 2968 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
947 : : {
948 : 0 : bms_free(required_outer);
949 : 0 : return;
950 : : }
951 : :
952 : : /*
953 : : * Do a precheck to quickly eliminate obviously-inferior paths. We
954 : : * calculate a cheap lower bound on the path's cost and then use
955 : : * add_path_precheck() to see if the path is clearly going to be dominated
956 : : * by some existing path for the joinrel. If not, do the full pushup with
957 : : * creating a fully valid path structure and submitting it to add_path().
958 : : * The latter two steps are expensive enough to make this two-phase
959 : : * methodology worthwhile.
960 : : */
961 : 502746 : initial_cost_nestloop(root, &workspace, jointype,
962 : 251373 : nestloop_subtype | PGS_CONSIDER_NONPARTIAL,
963 : 251373 : outer_path, inner_path, extra);
964 : :
965 [ + + + + ]: 502746 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
966 : 251373 : workspace.startup_cost, workspace.total_cost,
967 : 251373 : pathkeys, required_outer))
968 : : {
969 : 240364 : add_path(joinrel, (Path *)
970 : 240364 : create_nestloop_path(root,
971 : 120182 : joinrel,
972 : 120182 : jointype,
973 : : &workspace,
974 : 120182 : extra,
975 : 120182 : outer_path,
976 : 120182 : inner_path,
977 : 120182 : extra->restrictlist,
978 : 120182 : pathkeys,
979 : 120182 : required_outer));
980 : 120182 : }
981 : : else
982 : : {
983 : : /* Waste no memory when we reject a path here */
984 : 131191 : bms_free(required_outer);
985 : : }
986 [ - + ]: 273425 : }
987 : :
988 : : /*
989 : : * try_partial_nestloop_path
990 : : * Consider a partial nestloop join path; if it appears useful, push it into
991 : : * the joinrel's partial_pathlist via add_partial_path().
992 : : */
993 : : static void
994 : 33501 : try_partial_nestloop_path(PlannerInfo *root,
995 : : RelOptInfo *joinrel,
996 : : Path *outer_path,
997 : : Path *inner_path,
998 : : List *pathkeys,
999 : : JoinType jointype,
1000 : : uint64 nestloop_subtype,
1001 : : JoinPathExtraData *extra)
1002 : : {
1003 : 33501 : JoinCostWorkspace workspace;
1004 : :
1005 : : /*
1006 : : * If the inner path is parameterized, the parameterization must be fully
1007 : : * satisfied by the proposed outer path. Parameterized partial paths are
1008 : : * not supported. The caller should already have verified that no lateral
1009 : : * rels are required here.
1010 : : */
1011 [ + - ]: 33501 : Assert(bms_is_empty(joinrel->lateral_relids));
1012 [ - + + - ]: 33501 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1013 [ + + ]: 33501 : if (inner_path->param_info != NULL)
1014 : : {
1015 : 2521 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1016 : 2521 : RelOptInfo *outerrel = outer_path->parent;
1017 : 2521 : Relids outerrelids;
1018 : :
1019 : : /*
1020 : : * The inner and outer paths are parameterized, if at all, by the top
1021 : : * level parents, not the child relations, so we must use those relids
1022 : : * for our parameterization tests.
1023 : : */
1024 [ + + ]: 2521 : if (outerrel->top_parent_relids)
1025 : 1902 : outerrelids = outerrel->top_parent_relids;
1026 : : else
1027 : 619 : outerrelids = outerrel->relids;
1028 : :
1029 [ + + ]: 2521 : if (!bms_is_subset(inner_paramrels, outerrelids))
1030 : 202 : return;
1031 [ + + ]: 2521 : }
1032 : :
1033 : : /*
1034 : : * If the inner path is parameterized, it is parameterized by the topmost
1035 : : * parent of the outer rel, not the outer rel itself. We will need to
1036 : : * translate the parameterization, if this path is chosen, during
1037 : : * create_plan(). Here we just check whether we will be able to perform
1038 : : * the translation, and if not avoid creating a nestloop path.
1039 : : */
1040 [ + + + - : 33299 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + + - ]
1041 : 1740 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
1042 : 0 : return;
1043 : :
1044 : : /*
1045 : : * Before creating a path, get a quick lower bound on what it is likely to
1046 : : * cost. Bail out right away if it looks terrible.
1047 : : */
1048 : 66598 : initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
1049 : 33299 : outer_path, inner_path, extra);
1050 [ + + + + ]: 66598 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1051 : 33299 : workspace.total_cost, pathkeys))
1052 : 25964 : return;
1053 : :
1054 : : /* Might be good enough to be worth trying, so let's try it. */
1055 : 14670 : add_partial_path(joinrel, (Path *)
1056 : 14670 : create_nestloop_path(root,
1057 : 7335 : joinrel,
1058 : 7335 : jointype,
1059 : : &workspace,
1060 : 7335 : extra,
1061 : 7335 : outer_path,
1062 : 7335 : inner_path,
1063 : 7335 : extra->restrictlist,
1064 : 7335 : pathkeys,
1065 : : NULL));
1066 [ - + ]: 33501 : }
1067 : :
1068 : : /*
1069 : : * try_mergejoin_path
1070 : : * Consider a merge join path; if it appears useful, push it into
1071 : : * the joinrel's pathlist via add_path().
1072 : : */
1073 : : static void
1074 : 113693 : try_mergejoin_path(PlannerInfo *root,
1075 : : RelOptInfo *joinrel,
1076 : : Path *outer_path,
1077 : : Path *inner_path,
1078 : : List *pathkeys,
1079 : : List *mergeclauses,
1080 : : List *outersortkeys,
1081 : : List *innersortkeys,
1082 : : JoinType jointype,
1083 : : JoinPathExtraData *extra,
1084 : : bool is_partial)
1085 : : {
1086 : 113693 : Relids required_outer;
1087 : 113693 : int outer_presorted_keys = 0;
1088 : 113693 : JoinCostWorkspace workspace;
1089 : :
1090 [ + + ]: 113693 : if (is_partial)
1091 : : {
1092 : 7602 : try_partial_mergejoin_path(root,
1093 : 3801 : joinrel,
1094 : 3801 : outer_path,
1095 : 3801 : inner_path,
1096 : 3801 : pathkeys,
1097 : 3801 : mergeclauses,
1098 : 3801 : outersortkeys,
1099 : 3801 : innersortkeys,
1100 : 3801 : jointype,
1101 : 3801 : extra);
1102 : 3801 : return;
1103 : : }
1104 : :
1105 : : /*
1106 : : * If we are forming an outer join at this join, it's nonsensical to use
1107 : : * an input path that uses the outer join as part of its parameterization.
1108 : : * (This can happen despite our join order restrictions, since those apply
1109 : : * to what is in an input relation not what its parameters are.)
1110 : : */
1111 [ + + + + ]: 129917 : if (extra->sjinfo->ojrelid != 0 &&
1112 [ + + - + ]: 20025 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1113 [ + + ]: 20025 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1114 : 2 : return;
1115 : :
1116 : : /*
1117 : : * Check to see if proposed path is still parameterized, and reject if the
1118 : : * parameterization wouldn't be sensible.
1119 : : */
1120 : 219780 : required_outer = calc_non_nestloop_required_outer(outer_path,
1121 : 109890 : inner_path);
1122 [ + + + + ]: 109890 : if (required_outer &&
1123 : 3076 : !bms_overlap(required_outer, extra->param_source_rels))
1124 : : {
1125 : : /* Waste no memory when we reject a path here */
1126 : 2736 : bms_free(required_outer);
1127 : 2736 : return;
1128 : : }
1129 : :
1130 : : /*
1131 : : * If the given paths are already well enough ordered, we can skip doing
1132 : : * an explicit sort.
1133 : : *
1134 : : * We need to determine the number of presorted keys of the outer path to
1135 : : * decide whether explicit incremental sort can be applied when
1136 : : * outersortkeys is not NIL. We do not need to do the same for the inner
1137 : : * path though, as incremental sort currently does not support
1138 : : * mark/restore.
1139 : : */
1140 [ + + + + ]: 107154 : if (outersortkeys &&
1141 : 56762 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1142 : : &outer_presorted_keys))
1143 : 1475 : outersortkeys = NIL;
1144 [ + + + + ]: 107154 : if (innersortkeys &&
1145 : 89238 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1146 : 2171 : innersortkeys = NIL;
1147 : :
1148 : : /*
1149 : : * See comments in try_nestloop_path().
1150 : : */
1151 : 214308 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1152 : 107154 : outer_path, inner_path,
1153 : 107154 : outersortkeys, innersortkeys,
1154 : 107154 : outer_presorted_keys,
1155 : 107154 : extra);
1156 : :
1157 [ + + + + ]: 214308 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1158 : 107154 : workspace.startup_cost, workspace.total_cost,
1159 : 107154 : pathkeys, required_outer))
1160 : : {
1161 : 75518 : add_path(joinrel, (Path *)
1162 : 75518 : create_mergejoin_path(root,
1163 : 37759 : joinrel,
1164 : 37759 : jointype,
1165 : : &workspace,
1166 : 37759 : extra,
1167 : 37759 : outer_path,
1168 : 37759 : inner_path,
1169 : 37759 : extra->restrictlist,
1170 : 37759 : pathkeys,
1171 : 37759 : required_outer,
1172 : 37759 : mergeclauses,
1173 : 37759 : outersortkeys,
1174 : 37759 : innersortkeys,
1175 : 37759 : outer_presorted_keys));
1176 : 37759 : }
1177 : : else
1178 : : {
1179 : : /* Waste no memory when we reject a path here */
1180 : 69395 : bms_free(required_outer);
1181 : : }
1182 [ - + ]: 113693 : }
1183 : :
1184 : : /*
1185 : : * try_partial_mergejoin_path
1186 : : * Consider a partial merge join path; if it appears useful, push it into
1187 : : * the joinrel's pathlist via add_partial_path().
1188 : : */
1189 : : static void
1190 : 16515 : try_partial_mergejoin_path(PlannerInfo *root,
1191 : : RelOptInfo *joinrel,
1192 : : Path *outer_path,
1193 : : Path *inner_path,
1194 : : List *pathkeys,
1195 : : List *mergeclauses,
1196 : : List *outersortkeys,
1197 : : List *innersortkeys,
1198 : : JoinType jointype,
1199 : : JoinPathExtraData *extra)
1200 : : {
1201 : 16515 : int outer_presorted_keys = 0;
1202 : 16515 : JoinCostWorkspace workspace;
1203 : :
1204 : : /*
1205 : : * See comments in try_partial_hashjoin_path().
1206 : : */
1207 [ + - ]: 16515 : Assert(bms_is_empty(joinrel->lateral_relids));
1208 [ - + + - ]: 16515 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1209 [ - + - + ]: 16515 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1210 : 0 : return;
1211 : :
1212 : : /*
1213 : : * If the given paths are already well enough ordered, we can skip doing
1214 : : * an explicit sort.
1215 : : *
1216 : : * We need to determine the number of presorted keys of the outer path to
1217 : : * decide whether explicit incremental sort can be applied when
1218 : : * outersortkeys is not NIL. We do not need to do the same for the inner
1219 : : * path though, as incremental sort currently does not support
1220 : : * mark/restore.
1221 : : */
1222 [ + + + + ]: 16515 : if (outersortkeys &&
1223 : 12714 : pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1224 : : &outer_presorted_keys))
1225 : 7 : outersortkeys = NIL;
1226 [ + + + + ]: 16515 : if (innersortkeys &&
1227 : 16019 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1228 : 107 : innersortkeys = NIL;
1229 : :
1230 : : /*
1231 : : * See comments in try_partial_nestloop_path().
1232 : : */
1233 : 33030 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1234 : 16515 : outer_path, inner_path,
1235 : 16515 : outersortkeys, innersortkeys,
1236 : 16515 : outer_presorted_keys,
1237 : 16515 : extra);
1238 : :
1239 [ + + + + ]: 33030 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1240 : 16515 : workspace.total_cost, pathkeys))
1241 : 5555 : return;
1242 : :
1243 : : /* Might be good enough to be worth trying, so let's try it. */
1244 : 21920 : add_partial_path(joinrel, (Path *)
1245 : 21920 : create_mergejoin_path(root,
1246 : 10960 : joinrel,
1247 : 10960 : jointype,
1248 : : &workspace,
1249 : 10960 : extra,
1250 : 10960 : outer_path,
1251 : 10960 : inner_path,
1252 : 10960 : extra->restrictlist,
1253 : 10960 : pathkeys,
1254 : : NULL,
1255 : 10960 : mergeclauses,
1256 : 10960 : outersortkeys,
1257 : 10960 : innersortkeys,
1258 : 10960 : outer_presorted_keys));
1259 [ - + ]: 16515 : }
1260 : :
1261 : : /*
1262 : : * try_hashjoin_path
1263 : : * Consider a hash join path; if it appears useful, push it into
1264 : : * the joinrel's pathlist via add_path().
1265 : : */
1266 : : static void
1267 : 65448 : try_hashjoin_path(PlannerInfo *root,
1268 : : RelOptInfo *joinrel,
1269 : : Path *outer_path,
1270 : : Path *inner_path,
1271 : : List *hashclauses,
1272 : : JoinType jointype,
1273 : : JoinPathExtraData *extra)
1274 : : {
1275 : 65448 : Relids required_outer;
1276 : 65448 : JoinCostWorkspace workspace;
1277 : :
1278 : : /*
1279 : : * If we are forming an outer join at this join, it's nonsensical to use
1280 : : * an input path that uses the outer join as part of its parameterization.
1281 : : * (This can happen despite our join order restrictions, since those apply
1282 : : * to what is in an input relation not what its parameters are.)
1283 : : */
1284 [ + + + + ]: 78320 : if (extra->sjinfo->ojrelid != 0 &&
1285 [ + + + + ]: 12877 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1286 [ + + ]: 12872 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1287 : 10 : return;
1288 : :
1289 : : /*
1290 : : * Check to see if proposed path is still parameterized, and reject if the
1291 : : * parameterization wouldn't be sensible.
1292 : : */
1293 : 130876 : required_outer = calc_non_nestloop_required_outer(outer_path,
1294 : 65438 : inner_path);
1295 [ + + + + ]: 65438 : if (required_outer &&
1296 : 8525 : !bms_overlap(required_outer, extra->param_source_rels))
1297 : : {
1298 : : /* Waste no memory when we reject a path here */
1299 : 7774 : bms_free(required_outer);
1300 : 7774 : return;
1301 : : }
1302 : :
1303 : : /*
1304 : : * See comments in try_nestloop_path(). Also note that hashjoin paths
1305 : : * never have any output pathkeys, per comments in create_hashjoin_path.
1306 : : */
1307 : 115328 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1308 : 57664 : outer_path, inner_path, extra, false);
1309 : :
1310 [ + + + + ]: 115328 : if (add_path_precheck(joinrel, workspace.disabled_nodes,
1311 : 57664 : workspace.startup_cost, workspace.total_cost,
1312 : 57664 : NIL, required_outer))
1313 : : {
1314 : 69986 : add_path(joinrel, (Path *)
1315 : 69986 : create_hashjoin_path(root,
1316 : 34993 : joinrel,
1317 : 34993 : jointype,
1318 : : &workspace,
1319 : 34993 : extra,
1320 : 34993 : outer_path,
1321 : 34993 : inner_path,
1322 : : false, /* parallel_hash */
1323 : 34993 : extra->restrictlist,
1324 : 34993 : required_outer,
1325 : 34993 : hashclauses));
1326 : 34993 : }
1327 : : else
1328 : : {
1329 : : /* Waste no memory when we reject a path here */
1330 : 22671 : bms_free(required_outer);
1331 : : }
1332 [ - + ]: 65448 : }
1333 : :
1334 : : /*
1335 : : * try_partial_hashjoin_path
1336 : : * Consider a partial hashjoin join path; if it appears useful, push it into
1337 : : * the joinrel's partial_pathlist via add_partial_path().
1338 : : * The outer side is partial. If parallel_hash is true, then the inner path
1339 : : * must be partial and will be run in parallel to create one or more shared
1340 : : * hash tables; otherwise the inner path must be complete and a copy of it
1341 : : * is run in every process to create separate identical private hash tables.
1342 : : */
1343 : : static void
1344 : 24646 : try_partial_hashjoin_path(PlannerInfo *root,
1345 : : RelOptInfo *joinrel,
1346 : : Path *outer_path,
1347 : : Path *inner_path,
1348 : : List *hashclauses,
1349 : : JoinType jointype,
1350 : : JoinPathExtraData *extra,
1351 : : bool parallel_hash)
1352 : : {
1353 : 24646 : JoinCostWorkspace workspace;
1354 : :
1355 : : /*
1356 : : * If the inner path is parameterized, we can't use a partial hashjoin.
1357 : : * Parameterized partial paths are not supported. The caller should
1358 : : * already have verified that no lateral rels are required here.
1359 : : */
1360 [ + - ]: 24646 : Assert(bms_is_empty(joinrel->lateral_relids));
1361 [ - + + - ]: 24646 : Assert(bms_is_empty(PATH_REQ_OUTER(outer_path)));
1362 [ - + - + ]: 24646 : if (!bms_is_empty(PATH_REQ_OUTER(inner_path)))
1363 : 0 : return;
1364 : :
1365 : : /*
1366 : : * Before creating a path, get a quick lower bound on what it is likely to
1367 : : * cost. Bail out right away if it looks terrible.
1368 : : */
1369 : 49292 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1370 : 24646 : outer_path, inner_path, extra, parallel_hash);
1371 [ + + + + ]: 49292 : if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1372 : 24646 : workspace.total_cost, NIL))
1373 : 6762 : return;
1374 : :
1375 : : /* Might be good enough to be worth trying, so let's try it. */
1376 : 35768 : add_partial_path(joinrel, (Path *)
1377 : 35768 : create_hashjoin_path(root,
1378 : 17884 : joinrel,
1379 : 17884 : jointype,
1380 : : &workspace,
1381 : 17884 : extra,
1382 : 17884 : outer_path,
1383 : 17884 : inner_path,
1384 : 17884 : parallel_hash,
1385 : 17884 : extra->restrictlist,
1386 : : NULL,
1387 : 17884 : hashclauses));
1388 [ - + ]: 24646 : }
1389 : :
1390 : : /*
1391 : : * sort_inner_and_outer
1392 : : * Create mergejoin join paths by explicitly sorting both the outer and
1393 : : * inner join relations on each available merge ordering.
1394 : : *
1395 : : * 'joinrel' is the join relation
1396 : : * 'outerrel' is the outer join relation
1397 : : * 'innerrel' is the inner join relation
1398 : : * 'jointype' is the type of join to do
1399 : : * 'extra' contains additional input values
1400 : : */
1401 : : static void
1402 : 65617 : sort_inner_and_outer(PlannerInfo *root,
1403 : : RelOptInfo *joinrel,
1404 : : RelOptInfo *outerrel,
1405 : : RelOptInfo *innerrel,
1406 : : JoinType jointype,
1407 : : JoinPathExtraData *extra)
1408 : : {
1409 : 65617 : Path *outer_path;
1410 : 65617 : Path *inner_path;
1411 : 65617 : Path *cheapest_partial_outer = NULL;
1412 : 65617 : Path *cheapest_safe_inner = NULL;
1413 : 65617 : List *all_pathkeys;
1414 : 65617 : ListCell *l;
1415 : :
1416 : : /* Nothing to do if there are no available mergejoin clauses */
1417 [ + + ]: 65617 : if (extra->mergeclause_list == NIL)
1418 : 12528 : return;
1419 : :
1420 : : /*
1421 : : * We only consider the cheapest-total-cost input paths, since we are
1422 : : * assuming here that a sort is required. We will consider
1423 : : * cheapest-startup-cost input paths later, and only if they don't need a
1424 : : * sort.
1425 : : *
1426 : : * This function intentionally does not consider parameterized input
1427 : : * paths, except when the cheapest-total is parameterized. If we did so,
1428 : : * we'd have a combinatorial explosion of mergejoin paths of dubious
1429 : : * value. This interacts with decisions elsewhere that also discriminate
1430 : : * against mergejoins with parameterized inputs; see comments in
1431 : : * src/backend/optimizer/README.
1432 : : */
1433 : 53089 : outer_path = outerrel->cheapest_total_path;
1434 : 53089 : inner_path = innerrel->cheapest_total_path;
1435 : :
1436 : : /*
1437 : : * If either cheapest-total path is parameterized by the other rel, we
1438 : : * can't use a mergejoin. (There's no use looking for alternative input
1439 : : * paths, since these should already be the least-parameterized available
1440 : : * paths.)
1441 : : */
1442 [ + + + - : 53234 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+ + + + +
- ]
1443 [ + + + - : 52715 : PATH_PARAM_BY_REL(inner_path, outerrel))
+ + + - ]
1444 : 766 : return;
1445 : :
1446 : : /*
1447 : : * If the joinrel is parallel-safe, we may be able to consider a partial
1448 : : * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and
1449 : : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1450 : : * Also, the resulting path must not be parameterized.
1451 : : */
1452 [ + + ]: 52323 : if (joinrel->consider_parallel &&
1453 [ + + ]: 47726 : jointype != JOIN_FULL &&
1454 [ + + ]: 47348 : jointype != JOIN_RIGHT &&
1455 [ + + ]: 42627 : jointype != JOIN_RIGHT_ANTI &&
1456 [ + + - + ]: 42515 : outerrel->partial_pathlist != NIL &&
1457 : 12125 : bms_is_empty(joinrel->lateral_relids))
1458 : : {
1459 : 12125 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1460 : :
1461 [ + + ]: 12125 : if (inner_path->parallel_safe)
1462 : 12084 : cheapest_safe_inner = inner_path;
1463 : : else
1464 : 41 : cheapest_safe_inner =
1465 : 41 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1466 : 12125 : }
1467 : :
1468 : : /*
1469 : : * Each possible ordering of the available mergejoin clauses will generate
1470 : : * a differently-sorted result path at essentially the same cost. We have
1471 : : * no basis for choosing one over another at this level of joining, but
1472 : : * some sort orders may be more useful than others for higher-level
1473 : : * mergejoins, so it's worth considering multiple orderings.
1474 : : *
1475 : : * Actually, it's not quite true that every mergeclause ordering will
1476 : : * generate a different path order, because some of the clauses may be
1477 : : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1478 : : * what we do is convert the mergeclause list to a list of canonical
1479 : : * pathkeys, and then consider different orderings of the pathkeys.
1480 : : *
1481 : : * Generating a path for *every* permutation of the pathkeys doesn't seem
1482 : : * like a winning strategy; the cost in planning time is too high. For
1483 : : * now, we generate one path for each pathkey, listing that pathkey first
1484 : : * and the rest in random order. This should allow at least a one-clause
1485 : : * mergejoin without re-sorting against any other possible mergejoin
1486 : : * partner path. But if we've not guessed the right ordering of secondary
1487 : : * keys, we may end up evaluating clauses as qpquals when they could have
1488 : : * been done as mergeclauses. (In practice, it's rare that there's more
1489 : : * than two or three mergeclauses, so expending a huge amount of thought
1490 : : * on that is probably not worth it.)
1491 : : *
1492 : : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1493 : : * some heuristics behind it (see that function), so be sure to try it
1494 : : * exactly as-is as well as making variants.
1495 : : */
1496 : 104646 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1497 : 52323 : extra->mergeclause_list,
1498 : 52323 : joinrel);
1499 : :
1500 [ + - + + : 109085 : foreach(l, all_pathkeys)
+ + ]
1501 : : {
1502 : 56762 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1503 : 56762 : List *cur_mergeclauses;
1504 : 56762 : List *outerkeys;
1505 : 56762 : List *innerkeys;
1506 : 56762 : List *merge_pathkeys;
1507 : :
1508 : : /* Make a pathkey list with this guy first */
1509 [ + + ]: 56762 : if (l != list_head(all_pathkeys))
1510 : 8878 : outerkeys = lcons(front_pathkey,
1511 : 8878 : list_delete_nth_cell(list_copy(all_pathkeys),
1512 : 4439 : foreach_current_index(l)));
1513 : : else
1514 : 52323 : outerkeys = all_pathkeys; /* no work at first one... */
1515 : :
1516 : : /* Sort the mergeclauses into the corresponding ordering */
1517 : 56762 : cur_mergeclauses =
1518 : 113524 : find_mergeclauses_for_outer_pathkeys(root,
1519 : 56762 : outerkeys,
1520 : 56762 : extra->mergeclause_list);
1521 : :
1522 : : /* Should have used them all... */
1523 [ + - ]: 56762 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1524 : :
1525 : : /* Build sort pathkeys for the inner side */
1526 : 113524 : innerkeys = make_inner_pathkeys_for_merge(root,
1527 : 56762 : cur_mergeclauses,
1528 : 56762 : outerkeys);
1529 : :
1530 : : /* Build pathkeys representing output sort order */
1531 : 113524 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1532 : 56762 : outerkeys);
1533 : :
1534 : : /*
1535 : : * And now we can make the path.
1536 : : *
1537 : : * Note: it's possible that the cheapest paths will already be sorted
1538 : : * properly. try_mergejoin_path will detect that case and suppress an
1539 : : * explicit sort step, so we needn't do so here.
1540 : : */
1541 : 113524 : try_mergejoin_path(root,
1542 : 56762 : joinrel,
1543 : 56762 : outer_path,
1544 : 56762 : inner_path,
1545 : 56762 : merge_pathkeys,
1546 : 56762 : cur_mergeclauses,
1547 : 56762 : outerkeys,
1548 : 56762 : innerkeys,
1549 : 56762 : jointype,
1550 : 56762 : extra,
1551 : : false);
1552 : :
1553 : : /*
1554 : : * If we have partial outer and parallel safe inner path then try
1555 : : * partial mergejoin path.
1556 : : */
1557 [ + + + + ]: 56762 : if (cheapest_partial_outer && cheapest_safe_inner)
1558 : 25428 : try_partial_mergejoin_path(root,
1559 : 12714 : joinrel,
1560 : 12714 : cheapest_partial_outer,
1561 : 12714 : cheapest_safe_inner,
1562 : 12714 : merge_pathkeys,
1563 : 12714 : cur_mergeclauses,
1564 : 12714 : outerkeys,
1565 : 12714 : innerkeys,
1566 : 12714 : jointype,
1567 : 12714 : extra);
1568 : 56762 : }
1569 [ - + ]: 65153 : }
1570 : :
1571 : : /*
1572 : : * generate_mergejoin_paths
1573 : : * Creates possible mergejoin paths for input outerpath.
1574 : : *
1575 : : * We generate mergejoins if mergejoin clauses are available. We have
1576 : : * two ways to generate the inner path for a mergejoin: sort the cheapest
1577 : : * inner path, or use an inner path that is already suitably ordered for the
1578 : : * merge. If we have several mergeclauses, it could be that there is no inner
1579 : : * path (or only a very expensive one) for the full list of mergeclauses, but
1580 : : * better paths exist if we truncate the mergeclause list (thereby discarding
1581 : : * some sort key requirements). So, we consider truncations of the
1582 : : * mergeclause list as well as the full list. (Ideally we'd consider all
1583 : : * subsets of the mergeclause list, but that seems way too expensive.)
1584 : : */
1585 : : static void
1586 : 129276 : generate_mergejoin_paths(PlannerInfo *root,
1587 : : RelOptInfo *joinrel,
1588 : : RelOptInfo *innerrel,
1589 : : Path *outerpath,
1590 : : JoinType jointype,
1591 : : JoinPathExtraData *extra,
1592 : : bool useallclauses,
1593 : : Path *inner_cheapest_total,
1594 : : List *merge_pathkeys,
1595 : : bool is_partial)
1596 : : {
1597 : 129276 : List *mergeclauses;
1598 : 129276 : List *innersortkeys;
1599 : 129276 : List *trialsortkeys;
1600 : 129276 : Path *cheapest_startup_inner;
1601 : 129276 : Path *cheapest_total_inner;
1602 : 129276 : int num_sortkeys;
1603 : 129276 : int sortkeycnt;
1604 : :
1605 : : /* Look for useful mergeclauses (if any) */
1606 : 129276 : mergeclauses =
1607 : 258552 : find_mergeclauses_for_outer_pathkeys(root,
1608 : 129276 : outerpath->pathkeys,
1609 : 129276 : extra->mergeclause_list);
1610 : :
1611 : : /*
1612 : : * Done with this outer path if no chance for a mergejoin.
1613 : : *
1614 : : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1615 : : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1616 : : * but since mergejoin is our only join type that supports FULL JOIN
1617 : : * without any join clauses, it's necessary to generate a clauseless
1618 : : * mergejoin path instead.
1619 : : */
1620 [ + + ]: 129276 : if (mergeclauses == NIL)
1621 : : {
1622 [ + + ]: 91812 : if (jointype == JOIN_FULL)
1623 : : /* okay to try for mergejoin */ ;
1624 : : else
1625 : 91317 : return;
1626 : 495 : }
1627 [ + + + + ]: 37959 : if (useallclauses &&
1628 : 4947 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1629 : 590 : return;
1630 : :
1631 : : /* Compute the required ordering of the inner path */
1632 : 74738 : innersortkeys = make_inner_pathkeys_for_merge(root,
1633 : 37369 : mergeclauses,
1634 : 37369 : outerpath->pathkeys);
1635 : :
1636 : : /*
1637 : : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1638 : : * a sort will be needed, only cheapest total cost matters. (But
1639 : : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1640 : : * already correctly sorted.)
1641 : : */
1642 : 74738 : try_mergejoin_path(root,
1643 : 37369 : joinrel,
1644 : 37369 : outerpath,
1645 : 37369 : inner_cheapest_total,
1646 : 37369 : merge_pathkeys,
1647 : 37369 : mergeclauses,
1648 : : NIL,
1649 : 37369 : innersortkeys,
1650 : 37369 : jointype,
1651 : 37369 : extra,
1652 : 37369 : is_partial);
1653 : :
1654 : : /*
1655 : : * Look for presorted inner paths that satisfy the innersortkey list ---
1656 : : * or any truncation thereof, if we are allowed to build a mergejoin using
1657 : : * a subset of the merge clauses. Here, we consider both cheap startup
1658 : : * cost and cheap total cost.
1659 : : *
1660 : : * Currently we do not consider parameterized inner paths here. This
1661 : : * interacts with decisions elsewhere that also discriminate against
1662 : : * mergejoins with parameterized inputs; see comments in
1663 : : * src/backend/optimizer/README.
1664 : : *
1665 : : * As we shorten the sortkey list, we should consider only paths that are
1666 : : * strictly cheaper than (in particular, not the same as) any path found
1667 : : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1668 : : * merge keys than a given path allows (treating the rest as plain
1669 : : * joinquals), which is unlikely to be a good idea. Also, eliminating
1670 : : * paths here on the basis of compare_path_costs is a lot cheaper than
1671 : : * building the mergejoin path only to throw it away.
1672 : : *
1673 : : * If inner_cheapest_total is well enough sorted to have not required a
1674 : : * sort in the path made above, we shouldn't make a duplicate path with
1675 : : * it, either. We handle that case with the same logic that handles the
1676 : : * previous consideration, by initializing the variables that track
1677 : : * cheapest-so-far properly. Note that we do NOT reject
1678 : : * inner_cheapest_total if we find it matches some shorter set of
1679 : : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1680 : : * sorting inner_cheapest_total, whereas we did sort it above, so the
1681 : : * plans being considered are different.
1682 : : */
1683 [ + + + + ]: 74738 : if (pathkeys_contained_in(innersortkeys,
1684 : 37369 : inner_cheapest_total->pathkeys))
1685 : : {
1686 : : /* inner_cheapest_total didn't require a sort */
1687 : 775 : cheapest_startup_inner = inner_cheapest_total;
1688 : 775 : cheapest_total_inner = inner_cheapest_total;
1689 : 775 : }
1690 : : else
1691 : : {
1692 : : /* it did require a sort, at least for the full set of keys */
1693 : 36594 : cheapest_startup_inner = NULL;
1694 : 36594 : cheapest_total_inner = NULL;
1695 : : }
1696 : 37369 : num_sortkeys = list_length(innersortkeys);
1697 [ + + + + ]: 37369 : if (num_sortkeys > 1 && !useallclauses)
1698 : 2112 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1699 : : else
1700 : 35257 : trialsortkeys = innersortkeys; /* won't really truncate */
1701 : :
1702 [ + + ]: 72515 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1703 : : {
1704 : 39483 : Path *innerpath;
1705 : 39483 : List *newclauses = NIL;
1706 : :
1707 : : /*
1708 : : * Look for an inner path ordered well enough for the first
1709 : : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1710 : : * destructively, which is why we made a copy...
1711 : : */
1712 : 39483 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1713 : 78966 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1714 : 39483 : trialsortkeys,
1715 : : NULL,
1716 : : TOTAL_COST,
1717 : 39483 : is_partial);
1718 [ + + + + ]: 41644 : if (innerpath != NULL &&
1719 [ + + ]: 20651 : (cheapest_total_inner == NULL ||
1720 : 2161 : compare_path_costs(innerpath, cheapest_total_inner,
1721 : 2161 : TOTAL_COST) < 0))
1722 : : {
1723 : : /* Found a cheap (or even-cheaper) sorted path */
1724 : : /* Select the right mergeclauses, if we didn't already */
1725 [ + + ]: 19178 : if (sortkeycnt < num_sortkeys)
1726 : : {
1727 : 725 : newclauses =
1728 : 1450 : trim_mergeclauses_for_inner_pathkeys(root,
1729 : 725 : mergeclauses,
1730 : 725 : trialsortkeys);
1731 [ - + ]: 725 : Assert(newclauses != NIL);
1732 : 725 : }
1733 : : else
1734 : 18453 : newclauses = mergeclauses;
1735 : 38356 : try_mergejoin_path(root,
1736 : 19178 : joinrel,
1737 : 19178 : outerpath,
1738 : 19178 : innerpath,
1739 : 19178 : merge_pathkeys,
1740 : 19178 : newclauses,
1741 : : NIL,
1742 : : NIL,
1743 : 19178 : jointype,
1744 : 19178 : extra,
1745 : 19178 : is_partial);
1746 : 19178 : cheapest_total_inner = innerpath;
1747 : 19178 : }
1748 : : /* Same on the basis of cheapest startup cost ... */
1749 : 78966 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1750 : 39483 : trialsortkeys,
1751 : : NULL,
1752 : : STARTUP_COST,
1753 : 39483 : is_partial);
1754 [ + + + + ]: 41644 : if (innerpath != NULL &&
1755 [ + + ]: 20651 : (cheapest_startup_inner == NULL ||
1756 : 2161 : compare_path_costs(innerpath, cheapest_startup_inner,
1757 : 2161 : STARTUP_COST) < 0))
1758 : : {
1759 : : /* Found a cheap (or even-cheaper) sorted path */
1760 [ + + ]: 18777 : if (innerpath != cheapest_total_inner)
1761 : : {
1762 : : /*
1763 : : * Avoid rebuilding clause list if we already made one; saves
1764 : : * memory in big join trees...
1765 : : */
1766 [ + - ]: 384 : if (newclauses == NIL)
1767 : : {
1768 [ # # ]: 0 : if (sortkeycnt < num_sortkeys)
1769 : : {
1770 : 0 : newclauses =
1771 : 0 : trim_mergeclauses_for_inner_pathkeys(root,
1772 : 0 : mergeclauses,
1773 : 0 : trialsortkeys);
1774 [ # # ]: 0 : Assert(newclauses != NIL);
1775 : 0 : }
1776 : : else
1777 : 0 : newclauses = mergeclauses;
1778 : 0 : }
1779 : 768 : try_mergejoin_path(root,
1780 : 384 : joinrel,
1781 : 384 : outerpath,
1782 : 384 : innerpath,
1783 : 384 : merge_pathkeys,
1784 : 384 : newclauses,
1785 : : NIL,
1786 : : NIL,
1787 : 384 : jointype,
1788 : 384 : extra,
1789 : 384 : is_partial);
1790 : 384 : }
1791 : 18777 : cheapest_startup_inner = innerpath;
1792 : 18777 : }
1793 : :
1794 : : /*
1795 : : * Don't consider truncated sortkeys if we need all clauses.
1796 : : */
1797 [ + + ]: 39483 : if (useallclauses)
1798 : 4337 : break;
1799 [ + + ]: 39483 : }
1800 : 129276 : }
1801 : :
1802 : : /*
1803 : : * match_unsorted_outer
1804 : : * Creates possible join paths for processing a single join relation
1805 : : * 'joinrel' by employing either iterative substitution or
1806 : : * mergejoining on each of its possible outer paths (considering
1807 : : * only outer paths that are already ordered well enough for merging).
1808 : : *
1809 : : * We always generate a nestloop path for each available outer path.
1810 : : * In fact we may generate as many as five: one on the cheapest-total-cost
1811 : : * inner path, one on the same with materialization, one on the
1812 : : * cheapest-startup-cost inner path (if different), one on the
1813 : : * cheapest-total inner-indexscan path (if any), and one on the
1814 : : * cheapest-startup inner-indexscan path (if different).
1815 : : *
1816 : : * We also consider mergejoins if mergejoin clauses are available. See
1817 : : * detailed comments in generate_mergejoin_paths.
1818 : : *
1819 : : * 'joinrel' is the join relation
1820 : : * 'outerrel' is the outer join relation
1821 : : * 'innerrel' is the inner join relation
1822 : : * 'jointype' is the type of join to do
1823 : : * 'extra' contains additional input values
1824 : : */
1825 : : static void
1826 : 65153 : match_unsorted_outer(PlannerInfo *root,
1827 : : RelOptInfo *joinrel,
1828 : : RelOptInfo *outerrel,
1829 : : RelOptInfo *innerrel,
1830 : : JoinType jointype,
1831 : : JoinPathExtraData *extra)
1832 : : {
1833 : 65153 : bool nestjoinOK;
1834 : 65153 : bool useallclauses;
1835 : 65153 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
1836 : 65153 : Path *matpath = NULL;
1837 : 65153 : ListCell *lc1;
1838 : :
1839 : : /*
1840 : : * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1841 : : * join.
1842 : : */
1843 [ + + ]: 65153 : if (jointype == JOIN_RIGHT_SEMI)
1844 : 23 : return;
1845 : :
1846 : : /*
1847 : : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1848 : : * are doing a right, right-anti or full mergejoin, we must use *all* the
1849 : : * mergeclauses as join clauses, else we will not have a valid plan.
1850 : : * (Although these two flags are currently inverses, keep them separate
1851 : : * for clarity and possible future changes.)
1852 : : */
1853 [ - + + ]: 65130 : switch (jointype)
1854 : : {
1855 : : case JOIN_INNER:
1856 : : case JOIN_LEFT:
1857 : : case JOIN_SEMI:
1858 : : case JOIN_ANTI:
1859 : 58882 : nestjoinOK = true;
1860 : 58882 : useallclauses = false;
1861 : 58882 : break;
1862 : : case JOIN_RIGHT:
1863 : : case JOIN_RIGHT_ANTI:
1864 : : case JOIN_FULL:
1865 : 6248 : nestjoinOK = false;
1866 : 6248 : useallclauses = true;
1867 : 6248 : break;
1868 : : default:
1869 [ # # # # ]: 0 : elog(ERROR, "unrecognized join type: %d",
1870 : : (int) jointype);
1871 : 0 : nestjoinOK = false; /* keep compiler quiet */
1872 : 0 : useallclauses = false;
1873 : 0 : break;
1874 : : }
1875 : :
1876 : : /*
1877 : : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1878 : : * we will consider it below as a member of cheapest_parameterized_paths,
1879 : : * but the other possibilities considered in this routine aren't usable.
1880 : : *
1881 : : * Furthermore, if the inner side is a unique-ified relation, we cannot
1882 : : * generate any valid paths here, because the inner rel's dependency on
1883 : : * the outer rel makes unique-ification meaningless.
1884 : : */
1885 [ + + - + : 65130 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+ + + + +
- ]
1886 : : {
1887 : 673 : inner_cheapest_total = NULL;
1888 : :
1889 [ + + + + : 673 : if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
- + ]
1890 : 6 : return;
1891 : 667 : }
1892 : :
1893 [ + + ]: 65124 : if (nestjoinOK)
1894 : : {
1895 : : /*
1896 : : * Consider materializing the cheapest inner path, unless that is
1897 : : * disabled or the path in question materializes its output anyway.
1898 : : */
1899 [ + + ]: 58876 : if ((extra->pgs_mask & PGS_NESTLOOP_MATERIALIZE) != 0 &&
1900 [ + + + + ]: 58466 : inner_cheapest_total != NULL &&
1901 : 57799 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1902 : 56540 : matpath = (Path *)
1903 : 56540 : create_material_path(innerrel, inner_cheapest_total, true);
1904 : 58876 : }
1905 : :
1906 [ + - + + : 199053 : foreach(lc1, outerrel->pathlist)
+ + ]
1907 : : {
1908 : 133929 : Path *outerpath = (Path *) lfirst(lc1);
1909 : 133929 : List *merge_pathkeys;
1910 : :
1911 : : /*
1912 : : * We cannot use an outer path that is parameterized by the inner rel.
1913 : : */
1914 [ + + + - : 133929 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + -
+ ]
1915 : 19523 : continue;
1916 : :
1917 : : /*
1918 : : * The result will have this sort order (even if it is implemented as
1919 : : * a nestloop, and even if some of the mergeclauses are implemented by
1920 : : * qpquals rather than as true mergeclauses):
1921 : : */
1922 : 228812 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1923 : 114406 : outerpath->pathkeys);
1924 : :
1925 [ + + ]: 114406 : if (nestjoinOK)
1926 : : {
1927 : : /*
1928 : : * Consider nestloop joins using this outer path and various
1929 : : * available paths for the inner relation. We consider the
1930 : : * cheapest-total paths for each available parameterization of the
1931 : : * inner relation, including the unparameterized case.
1932 : : */
1933 : 103785 : ListCell *lc2;
1934 : :
1935 [ + - + + : 257613 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
1936 : : {
1937 : 153828 : Path *innerpath = (Path *) lfirst(lc2);
1938 : 153828 : Path *mpath;
1939 : :
1940 : 307656 : try_nestloop_path(root,
1941 : 153828 : joinrel,
1942 : 153828 : outerpath,
1943 : 153828 : innerpath,
1944 : 153828 : merge_pathkeys,
1945 : 153828 : jointype,
1946 : : PGS_NESTLOOP_PLAIN,
1947 : 153828 : extra);
1948 : :
1949 : : /*
1950 : : * Try generating a memoize path and see if that makes the
1951 : : * nested loop any cheaper.
1952 : : */
1953 : 307656 : mpath = get_memoize_path(root, innerrel, outerrel,
1954 : 153828 : innerpath, outerpath, jointype,
1955 : 153828 : extra);
1956 [ + + ]: 153828 : if (mpath != NULL)
1957 : 37842 : try_nestloop_path(root,
1958 : 18921 : joinrel,
1959 : 18921 : outerpath,
1960 : 18921 : mpath,
1961 : 18921 : merge_pathkeys,
1962 : 18921 : jointype,
1963 : : PGS_NESTLOOP_MEMOIZE,
1964 : 18921 : extra);
1965 : 153828 : }
1966 : :
1967 : : /* Also consider materialized form of the cheapest inner path */
1968 [ + + ]: 103785 : if (matpath != NULL)
1969 : 201352 : try_nestloop_path(root,
1970 : 100676 : joinrel,
1971 : 100676 : outerpath,
1972 : 100676 : matpath,
1973 : 100676 : merge_pathkeys,
1974 : 100676 : jointype,
1975 : : PGS_NESTLOOP_MATERIALIZE,
1976 : 100676 : extra);
1977 : 103785 : }
1978 : :
1979 : : /* Can't do anything else if inner rel is parameterized by outer */
1980 [ + + ]: 114406 : if (inner_cheapest_total == NULL)
1981 : 773 : continue;
1982 : :
1983 : : /* Generate merge join paths */
1984 : 227266 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1985 : 113633 : jointype, extra, useallclauses,
1986 : 113633 : inner_cheapest_total, merge_pathkeys,
1987 : : false);
1988 [ + + ]: 133929 : }
1989 : :
1990 : : /*
1991 : : * Consider partial nestloop and mergejoin plan if outerrel has any
1992 : : * partial path and the joinrel is parallel-safe. However, we can't
1993 : : * handle joins needing lateral rels, since partial paths must not be
1994 : : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1995 : : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1996 : : */
1997 [ + + ]: 65124 : if (joinrel->consider_parallel &&
1998 [ + + ]: 56027 : jointype != JOIN_FULL &&
1999 [ + + ]: 55629 : jointype != JOIN_RIGHT &&
2000 [ + + ]: 50704 : jointype != JOIN_RIGHT_ANTI &&
2001 [ + + - + ]: 50589 : outerrel->partial_pathlist != NIL &&
2002 : 12408 : bms_is_empty(joinrel->lateral_relids))
2003 : : {
2004 [ - + ]: 12408 : if (nestjoinOK)
2005 : 24816 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2006 : 12408 : jointype, extra);
2007 : :
2008 : : /*
2009 : : * If inner_cheapest_total is NULL or non parallel-safe then find the
2010 : : * cheapest total parallel safe path.
2011 : : */
2012 [ + + + + ]: 12408 : if (inner_cheapest_total == NULL ||
2013 : 12338 : !inner_cheapest_total->parallel_safe)
2014 : : {
2015 : 157 : inner_cheapest_total =
2016 : 157 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2017 : 157 : }
2018 : :
2019 [ + + ]: 12408 : if (inner_cheapest_total)
2020 : 24662 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2021 : 12331 : jointype, extra,
2022 : 12331 : inner_cheapest_total);
2023 : 12408 : }
2024 : 65153 : }
2025 : :
2026 : : /*
2027 : : * consider_parallel_mergejoin
2028 : : * Try to build partial paths for a joinrel by joining a partial path
2029 : : * for the outer relation to a complete path for the inner relation.
2030 : : *
2031 : : * 'joinrel' is the join relation
2032 : : * 'outerrel' is the outer join relation
2033 : : * 'innerrel' is the inner join relation
2034 : : * 'jointype' is the type of join to do
2035 : : * 'extra' contains additional input values
2036 : : * 'inner_cheapest_total' cheapest total path for innerrel
2037 : : */
2038 : : static void
2039 : 12331 : consider_parallel_mergejoin(PlannerInfo *root,
2040 : : RelOptInfo *joinrel,
2041 : : RelOptInfo *outerrel,
2042 : : RelOptInfo *innerrel,
2043 : : JoinType jointype,
2044 : : JoinPathExtraData *extra,
2045 : : Path *inner_cheapest_total)
2046 : : {
2047 : 12331 : ListCell *lc1;
2048 : :
2049 : : /* generate merge join path for each partial outer path */
2050 [ + - + + : 27974 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
2051 : : {
2052 : 15643 : Path *outerpath = (Path *) lfirst(lc1);
2053 : 15643 : List *merge_pathkeys;
2054 : :
2055 : : /*
2056 : : * Figure out what useful ordering any paths we create will have.
2057 : : */
2058 : 31286 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2059 : 15643 : outerpath->pathkeys);
2060 : :
2061 : 31286 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2062 : 15643 : extra, false, inner_cheapest_total,
2063 : 15643 : merge_pathkeys, true);
2064 : 15643 : }
2065 : 12331 : }
2066 : :
2067 : : /*
2068 : : * consider_parallel_nestloop
2069 : : * Try to build partial paths for a joinrel by joining a partial path for the
2070 : : * outer relation to a complete path for the inner relation.
2071 : : *
2072 : : * 'joinrel' is the join relation
2073 : : * 'outerrel' is the outer join relation
2074 : : * 'innerrel' is the inner join relation
2075 : : * 'jointype' is the type of join to do
2076 : : * 'extra' contains additional input values
2077 : : */
2078 : : static void
2079 : 12408 : consider_parallel_nestloop(PlannerInfo *root,
2080 : : RelOptInfo *joinrel,
2081 : : RelOptInfo *outerrel,
2082 : : RelOptInfo *innerrel,
2083 : : JoinType jointype,
2084 : : JoinPathExtraData *extra)
2085 : : {
2086 : 12408 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
2087 : 12408 : Path *matpath = NULL;
2088 : 12408 : ListCell *lc1;
2089 : :
2090 : : /*
2091 : : * Consider materializing the cheapest inner path, unless: 1)
2092 : : * materialization is disabled here, 2) the cheapest inner path is not
2093 : : * parallel-safe, 3) the cheapest inner path is parameterized by the outer
2094 : : * rel, or 4) the cheapest inner path materializes its output anyway.
2095 : : */
2096 [ + + ]: 12408 : if ((extra->pgs_mask & PGS_NESTLOOP_MATERIALIZE) != 0 &&
2097 [ + + ]: 12298 : inner_cheapest_total->parallel_safe &&
2098 [ + + + - : 12240 : !PATH_PARAM_BY_REL(inner_cheapest_total, outerrel) &&
+ + + + +
- ]
2099 : 12159 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
2100 : : {
2101 : 12159 : matpath = (Path *)
2102 : 12159 : create_material_path(innerrel, inner_cheapest_total, true);
2103 [ + - ]: 12159 : Assert(matpath->parallel_safe);
2104 : 12159 : }
2105 : :
2106 [ + - + + : 28151 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
2107 : : {
2108 : 15743 : Path *outerpath = (Path *) lfirst(lc1);
2109 : 15743 : List *pathkeys;
2110 : 15743 : ListCell *lc2;
2111 : :
2112 : : /* Figure out what useful ordering any paths we create will have. */
2113 : 31486 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2114 : 15743 : outerpath->pathkeys);
2115 : :
2116 : : /*
2117 : : * Try the cheapest parameterized paths; only those which will produce
2118 : : * an unparameterized path when joined to this outerrel will survive
2119 : : * try_partial_nestloop_path. The cheapest unparameterized path is
2120 : : * also in this list.
2121 : : */
2122 [ + - + + : 32955 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2123 : : {
2124 : 17212 : Path *innerpath = (Path *) lfirst(lc2);
2125 : 17212 : Path *mpath;
2126 : :
2127 : : /* Can't join to an inner path that is not parallel-safe */
2128 [ + + ]: 17212 : if (!innerpath->parallel_safe)
2129 : 100 : continue;
2130 : :
2131 : 34224 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2132 : 17112 : pathkeys, jointype,
2133 : 17112 : PGS_NESTLOOP_PLAIN, extra);
2134 : :
2135 : : /*
2136 : : * Try generating a memoize path and see if that makes the nested
2137 : : * loop any cheaper.
2138 : : */
2139 : 34224 : mpath = get_memoize_path(root, innerrel, outerrel,
2140 : 17112 : innerpath, outerpath, jointype,
2141 : 17112 : extra);
2142 [ + + ]: 17112 : if (mpath != NULL)
2143 : 1932 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2144 : 966 : pathkeys, jointype,
2145 : 966 : PGS_NESTLOOP_MEMOIZE, extra);
2146 [ - + + ]: 17212 : }
2147 : :
2148 : : /* Also consider materialized form of the cheapest inner path */
2149 [ + + ]: 15743 : if (matpath != NULL)
2150 : 30846 : try_partial_nestloop_path(root, joinrel, outerpath, matpath,
2151 : 15423 : pathkeys, jointype,
2152 : 15423 : PGS_NESTLOOP_MATERIALIZE, extra);
2153 : 15743 : }
2154 : 12408 : }
2155 : :
2156 : : /*
2157 : : * hash_inner_and_outer
2158 : : * Create hashjoin join paths by explicitly hashing both the outer and
2159 : : * inner keys of each available hash clause.
2160 : : *
2161 : : * 'joinrel' is the join relation
2162 : : * 'outerrel' is the outer join relation
2163 : : * 'innerrel' is the inner join relation
2164 : : * 'jointype' is the type of join to do
2165 : : * 'extra' contains additional input values
2166 : : */
2167 : : static void
2168 : 66061 : hash_inner_and_outer(PlannerInfo *root,
2169 : : RelOptInfo *joinrel,
2170 : : RelOptInfo *outerrel,
2171 : : RelOptInfo *innerrel,
2172 : : JoinType jointype,
2173 : : JoinPathExtraData *extra)
2174 : : {
2175 : 66061 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2176 : 66061 : List *hashclauses;
2177 : 66061 : ListCell *l;
2178 : :
2179 : : /*
2180 : : * We need to build only one hashclauses list for any given pair of outer
2181 : : * and inner relations; all of the hashable clauses will be used as keys.
2182 : : *
2183 : : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2184 : : * usable with this pair of sub-relations.
2185 : : */
2186 : 66061 : hashclauses = NIL;
2187 [ + + + + : 132272 : foreach(l, extra->restrictlist)
+ + ]
2188 : : {
2189 : 66211 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2190 : :
2191 : : /*
2192 : : * If processing an outer join, only use its own join clauses for
2193 : : * hashing. For inner joins we need not be so picky.
2194 : : */
2195 [ + + + + : 66211 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+ - ]
2196 : 672 : continue;
2197 : :
2198 [ + + + + ]: 65539 : if (!restrictinfo->can_join ||
2199 : 59049 : restrictinfo->hashjoinoperator == InvalidOid)
2200 : 7430 : continue; /* not hashjoinable */
2201 : :
2202 : : /*
2203 : : * Check if clause has the form "outer op inner" or "inner op outer".
2204 : : */
2205 [ + + + + ]: 116218 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2206 : 58109 : innerrel->relids))
2207 : 36 : continue; /* no good for these input relations */
2208 : :
2209 : : /*
2210 : : * If clause has the form "inner op outer", check if its operator has
2211 : : * valid commutator. This is necessary because hashclauses in this
2212 : : * form will get commuted in createplan.c to put the outer var on the
2213 : : * left (see get_switched_clauses). This probably shouldn't ever
2214 : : * fail, since hashable operators ought to have commutators, but be
2215 : : * paranoid.
2216 : : *
2217 : : * The clause being hashjoinable indicates that it's an OpExpr.
2218 : : */
2219 [ + + + + ]: 58073 : if (!restrictinfo->outer_is_left &&
2220 : 29041 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2221 : 1 : continue;
2222 : :
2223 : 58072 : hashclauses = lappend(hashclauses, restrictinfo);
2224 [ + + ]: 66211 : }
2225 : :
2226 : : /* If we found any usable hashclauses, make paths */
2227 [ + + ]: 66061 : if (hashclauses)
2228 : : {
2229 : : /*
2230 : : * We consider both the cheapest-total-cost and cheapest-startup-cost
2231 : : * outer paths. There's no need to consider any but the
2232 : : * cheapest-total-cost inner path, however.
2233 : : */
2234 : 53950 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2235 : 53950 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2236 : 53950 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2237 : 53950 : ListCell *lc1;
2238 : 53950 : ListCell *lc2;
2239 : :
2240 : : /*
2241 : : * If either cheapest-total path is parameterized by the other rel, we
2242 : : * can't use a hashjoin. (There's no use looking for alternative
2243 : : * input paths, since these should already be the least-parameterized
2244 : : * available paths.)
2245 : : */
2246 [ + + + - : 54095 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+ + + + +
- ]
2247 [ + + + - : 53572 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
+ + + - ]
2248 : 766 : return;
2249 : :
2250 : : /*
2251 : : * Consider the cheapest startup outer together with the cheapest
2252 : : * total inner, and then consider pairings of cheapest-total paths
2253 : : * including parameterized ones. There is no use in generating
2254 : : * parameterized paths on the basis of possibly cheap startup cost, so
2255 : : * this is sufficient.
2256 : : */
2257 [ + + ]: 53184 : if (cheapest_startup_outer != NULL)
2258 : 106182 : try_hashjoin_path(root,
2259 : 53091 : joinrel,
2260 : 53091 : cheapest_startup_outer,
2261 : 53091 : cheapest_total_inner,
2262 : 53091 : hashclauses,
2263 : 53091 : jointype,
2264 : 53091 : extra);
2265 : :
2266 [ + - + + : 128616 : foreach(lc1, outerrel->cheapest_parameterized_paths)
+ + ]
2267 : : {
2268 : 75432 : Path *outerpath = (Path *) lfirst(lc1);
2269 : :
2270 : : /*
2271 : : * We cannot use an outer path that is parameterized by the inner
2272 : : * rel.
2273 : : */
2274 [ + + + - : 75432 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + +
- ]
2275 : 18680 : continue;
2276 : :
2277 [ + - + + : 139060 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2278 : : {
2279 : 82308 : Path *innerpath = (Path *) lfirst(lc2);
2280 : :
2281 : : /*
2282 : : * We cannot use an inner path that is parameterized by the
2283 : : * outer rel, either.
2284 : : */
2285 [ + + + - : 82308 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
+ + + + +
- ]
2286 : 20738 : continue;
2287 : :
2288 [ + + + + ]: 61570 : if (outerpath == cheapest_startup_outer &&
2289 : 52264 : innerpath == cheapest_total_inner)
2290 : 49213 : continue; /* already tried it */
2291 : :
2292 : 24714 : try_hashjoin_path(root,
2293 : 12357 : joinrel,
2294 : 12357 : outerpath,
2295 : 12357 : innerpath,
2296 : 12357 : hashclauses,
2297 : 12357 : jointype,
2298 : 12357 : extra);
2299 [ + + ]: 82308 : }
2300 [ + + ]: 75432 : }
2301 : :
2302 : : /*
2303 : : * If the joinrel is parallel-safe, we may be able to consider a
2304 : : * partial hash join.
2305 : : *
2306 : : * However, we can't handle JOIN_RIGHT_SEMI, because the hash table is
2307 : : * either a shared hash table or a private hash table per backend. In
2308 : : * the shared case, there is no concurrency protection for the match
2309 : : * flags, so multiple workers could inspect and set the flags
2310 : : * concurrently, potentially producing incorrect results. In the
2311 : : * private case, each worker has its own copy of the hash table, so no
2312 : : * single process has all the match flags.
2313 : : *
2314 : : * Also, the resulting path must not be parameterized.
2315 : : */
2316 [ + + ]: 53184 : if (joinrel->consider_parallel &&
2317 [ + + ]: 48516 : jointype != JOIN_RIGHT_SEMI &&
2318 [ + + - + ]: 47849 : outerrel->partial_pathlist != NIL &&
2319 : 12785 : bms_is_empty(joinrel->lateral_relids))
2320 : : {
2321 : 12785 : Path *cheapest_partial_outer;
2322 : 12785 : Path *cheapest_partial_inner = NULL;
2323 : 12785 : Path *cheapest_safe_inner = NULL;
2324 : :
2325 : 12785 : cheapest_partial_outer =
2326 : 12785 : (Path *) linitial(outerrel->partial_pathlist);
2327 : :
2328 : : /*
2329 : : * Can we use a partial inner plan too, so that we can build a
2330 : : * shared hash table in parallel?
2331 : : */
2332 [ + + + + ]: 12785 : if (innerrel->partial_pathlist != NIL &&
2333 : 12605 : enable_parallel_hash)
2334 : : {
2335 : 12559 : cheapest_partial_inner =
2336 : 12559 : (Path *) linitial(innerrel->partial_pathlist);
2337 : 25118 : try_partial_hashjoin_path(root, joinrel,
2338 : 12559 : cheapest_partial_outer,
2339 : 12559 : cheapest_partial_inner,
2340 : 12559 : hashclauses, jointype, extra,
2341 : : true /* parallel_hash */ );
2342 : 12559 : }
2343 : :
2344 : : /*
2345 : : * Normally, given that the joinrel is parallel-safe, the cheapest
2346 : : * total inner path will also be parallel-safe, but if not, we'll
2347 : : * have to search for the cheapest safe, unparameterized inner
2348 : : * path. If full, right, or right-anti join, we can't use
2349 : : * parallelism (building the hash table in each backend) because
2350 : : * no one process has all the match bits.
2351 : : */
2352 [ + + ]: 12785 : if (jointype == JOIN_FULL ||
2353 [ + + + + ]: 12468 : jointype == JOIN_RIGHT ||
2354 : 12149 : jointype == JOIN_RIGHT_ANTI)
2355 : 691 : cheapest_safe_inner = NULL;
2356 [ + + ]: 12094 : else if (cheapest_total_inner->parallel_safe)
2357 : 12031 : cheapest_safe_inner = cheapest_total_inner;
2358 : : else
2359 : 63 : cheapest_safe_inner =
2360 : 63 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2361 : :
2362 [ + + ]: 12785 : if (cheapest_safe_inner != NULL)
2363 : 24174 : try_partial_hashjoin_path(root, joinrel,
2364 : 12087 : cheapest_partial_outer,
2365 : 12087 : cheapest_safe_inner,
2366 : 12087 : hashclauses, jointype, extra,
2367 : : false /* parallel_hash */ );
2368 : 12785 : }
2369 [ + + ]: 53484 : }
2370 : 65595 : }
2371 : :
2372 : : /*
2373 : : * select_mergejoin_clauses
2374 : : * Select mergejoin clauses that are usable for a particular join.
2375 : : * Returns a list of RestrictInfo nodes for those clauses.
2376 : : *
2377 : : * *mergejoin_allowed is normally set to true, but it is set to false if
2378 : : * this is a right-semi join, or this is a right/right-anti/full join and
2379 : : * there are nonmergejoinable join clauses. The executor's mergejoin
2380 : : * machinery cannot handle such cases, so we have to avoid generating a
2381 : : * mergejoin plan. (Note that this flag does NOT consider whether there are
2382 : : * actually any mergejoinable clauses. This is correct because in some
2383 : : * cases we need to build a clauseless mergejoin. Simply returning NIL is
2384 : : * therefore not enough to distinguish safe from unsafe cases.)
2385 : : *
2386 : : * We also mark each selected RestrictInfo to show which side is currently
2387 : : * being considered as outer. These are transient markings that are only
2388 : : * good for the duration of the current add_paths_to_joinrel() call!
2389 : : *
2390 : : * We examine each restrictinfo clause known for the join to see
2391 : : * if it is mergejoinable and involves vars from the two sub-relations
2392 : : * currently of interest.
2393 : : */
2394 : : static List *
2395 : 65720 : select_mergejoin_clauses(PlannerInfo *root,
2396 : : RelOptInfo *joinrel,
2397 : : RelOptInfo *outerrel,
2398 : : RelOptInfo *innerrel,
2399 : : List *restrictlist,
2400 : : JoinType jointype,
2401 : : bool *mergejoin_allowed)
2402 : : {
2403 : 65720 : List *result_list = NIL;
2404 : 65720 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2405 : 65720 : bool have_nonmergeable_joinclause = false;
2406 : 65720 : ListCell *l;
2407 : :
2408 : : /*
2409 : : * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2410 : : * swapping inputs tends to be small here.
2411 : : */
2412 [ + + ]: 65720 : if (jointype == JOIN_RIGHT_SEMI)
2413 : : {
2414 : 1019 : *mergejoin_allowed = false;
2415 : 1019 : return NIL;
2416 : : }
2417 : :
2418 [ + + + + : 130229 : foreach(l, restrictlist)
+ + ]
2419 : : {
2420 : 65528 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2421 : :
2422 : : /*
2423 : : * If processing an outer join, only use its own join clauses in the
2424 : : * merge. For inner joins we can use pushed-down clauses too. (Note:
2425 : : * we don't set have_nonmergeable_joinclause here because pushed-down
2426 : : * clauses will become otherquals not joinquals.)
2427 : : */
2428 [ + + + + : 65528 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
+ - ]
2429 : 676 : continue;
2430 : :
2431 : : /* Check that clause is a mergeable operator clause */
2432 [ + + + + ]: 64852 : if (!restrictinfo->can_join ||
2433 : 58366 : restrictinfo->mergeopfamilies == NIL)
2434 : : {
2435 : : /*
2436 : : * The executor can handle extra joinquals that are constants, but
2437 : : * not anything else, when doing right/right-anti/full merge join.
2438 : : * (The reason to support constants is so we can do FULL JOIN ON
2439 : : * FALSE.)
2440 : : */
2441 [ + - + + ]: 7198 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2442 : 5732 : have_nonmergeable_joinclause = true;
2443 : 7198 : continue; /* not mergejoinable */
2444 : : }
2445 : :
2446 : : /*
2447 : : * Check if clause has the form "outer op inner" or "inner op outer".
2448 : : */
2449 [ + + + + ]: 115308 : if (!clause_sides_match_join(restrictinfo, outerrel->relids,
2450 : 57654 : innerrel->relids))
2451 : : {
2452 : 168 : have_nonmergeable_joinclause = true;
2453 : 168 : continue; /* no good for these input relations */
2454 : : }
2455 : :
2456 : : /*
2457 : : * If clause has the form "inner op outer", check if its operator has
2458 : : * valid commutator. This is necessary because mergejoin clauses in
2459 : : * this form will get commuted in createplan.c to put the outer var on
2460 : : * the left (see get_switched_clauses). This probably shouldn't ever
2461 : : * fail, since mergejoinable operators ought to have commutators, but
2462 : : * be paranoid.
2463 : : *
2464 : : * The clause being mergejoinable indicates that it's an OpExpr.
2465 : : */
2466 [ + + + + ]: 57486 : if (!restrictinfo->outer_is_left &&
2467 : 28369 : !OidIsValid(get_commutator(castNode(OpExpr, restrictinfo->clause)->opno)))
2468 : : {
2469 : 6 : have_nonmergeable_joinclause = true;
2470 : 6 : continue;
2471 : : }
2472 : :
2473 : : /*
2474 : : * Insist that each side have a non-redundant eclass. This
2475 : : * restriction is needed because various bits of the planner expect
2476 : : * that each clause in a merge be associable with some pathkey in a
2477 : : * canonical pathkey list, but redundant eclasses can't appear in
2478 : : * canonical sort orderings. (XXX it might be worth relaxing this,
2479 : : * but not enough time to address it for 8.3.)
2480 : : */
2481 : 57480 : update_mergeclause_eclasses(root, restrictinfo);
2482 : :
2483 [ + + + + ]: 57480 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2484 : 57472 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2485 : : {
2486 : 20 : have_nonmergeable_joinclause = true;
2487 : 20 : continue; /* can't handle redundant eclasses */
2488 : : }
2489 : :
2490 : 57460 : result_list = lappend(result_list, restrictinfo);
2491 [ - + + ]: 65528 : }
2492 : :
2493 : : /*
2494 : : * Report whether mergejoin is allowed (see comment at top of function).
2495 : : */
2496 [ + + ]: 64701 : switch (jointype)
2497 : : {
2498 : : case JOIN_RIGHT:
2499 : : case JOIN_RIGHT_ANTI:
2500 : : case JOIN_FULL:
2501 : 6590 : *mergejoin_allowed = !have_nonmergeable_joinclause;
2502 : 6590 : break;
2503 : : default:
2504 : 58111 : *mergejoin_allowed = true;
2505 : 58111 : break;
2506 : : }
2507 : :
2508 : 64701 : return result_list;
2509 : 65720 : }
|