Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * nodeIndexonlyscan.c
4 : : * Routines to support index-only scans
5 : : *
6 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/executor/nodeIndexonlyscan.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : /*
16 : : * INTERFACE ROUTINES
17 : : * ExecIndexOnlyScan scans an index
18 : : * IndexOnlyNext retrieve next tuple
19 : : * ExecInitIndexOnlyScan creates and initializes state info.
20 : : * ExecReScanIndexOnlyScan rescans the indexed relation.
21 : : * ExecEndIndexOnlyScan releases all storage.
22 : : * ExecIndexOnlyMarkPos marks scan position.
23 : : * ExecIndexOnlyRestrPos restores scan position.
24 : : * ExecIndexOnlyScanEstimate estimates DSM space needed for
25 : : * parallel index-only scan
26 : : * ExecIndexOnlyScanInitializeDSM initialize DSM for parallel
27 : : * index-only scan
28 : : * ExecIndexOnlyScanReInitializeDSM reinitialize DSM for fresh scan
29 : : * ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30 : : */
31 : : #include "postgres.h"
32 : :
33 : : #include "access/genam.h"
34 : : #include "access/relscan.h"
35 : : #include "access/tableam.h"
36 : : #include "access/tupdesc.h"
37 : : #include "access/visibilitymap.h"
38 : : #include "catalog/pg_type.h"
39 : : #include "executor/executor.h"
40 : : #include "executor/nodeIndexonlyscan.h"
41 : : #include "executor/nodeIndexscan.h"
42 : : #include "miscadmin.h"
43 : : #include "storage/bufmgr.h"
44 : : #include "storage/predicate.h"
45 : : #include "utils/builtins.h"
46 : : #include "utils/rel.h"
47 : :
48 : :
49 : : static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
50 : : static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
51 : : IndexTuple itup, TupleDesc itupdesc);
52 : :
53 : :
54 : : /* ----------------------------------------------------------------
55 : : * IndexOnlyNext
56 : : *
57 : : * Retrieve a tuple from the IndexOnlyScan node's index.
58 : : * ----------------------------------------------------------------
59 : : */
60 : : static TupleTableSlot *
61 : 893888 : IndexOnlyNext(IndexOnlyScanState *node)
62 : : {
63 : 893888 : EState *estate;
64 : 893888 : ExprContext *econtext;
65 : 893888 : ScanDirection direction;
66 : 893888 : IndexScanDesc scandesc;
67 : 893888 : TupleTableSlot *slot;
68 : 893888 : ItemPointer tid;
69 : :
70 : : /*
71 : : * extract necessary information from index scan node
72 : : */
73 : 893888 : estate = node->ss.ps.state;
74 : :
75 : : /*
76 : : * Determine which direction to scan the index in based on the plan's scan
77 : : * direction and the current direction of execution.
78 : : */
79 : 893888 : direction = ScanDirectionCombine(estate->es_direction,
80 : : ((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir);
81 : 893888 : scandesc = node->ioss_ScanDesc;
82 : 893888 : econtext = node->ss.ps.ps_ExprContext;
83 : 893888 : slot = node->ss.ss_ScanTupleSlot;
84 : :
85 [ + + ]: 893888 : if (scandesc == NULL)
86 : : {
87 : : /*
88 : : * We reach here if the index only scan is not parallel, or if we're
89 : : * serially executing an index only scan that was planned to be
90 : : * parallel.
91 : : */
92 : 1924 : scandesc = index_beginscan(node->ss.ss_currentRelation,
93 : 962 : node->ioss_RelationDesc,
94 : 962 : estate->es_snapshot,
95 : 962 : &node->ioss_Instrument,
96 : 962 : node->ioss_NumScanKeys,
97 : 962 : node->ioss_NumOrderByKeys);
98 : :
99 : 962 : node->ioss_ScanDesc = scandesc;
100 : :
101 : :
102 : : /* Set it up for index-only scan */
103 : 962 : node->ioss_ScanDesc->xs_want_itup = true;
104 : 962 : node->ioss_VMBuffer = InvalidBuffer;
105 : :
106 : : /*
107 : : * If no run-time keys to calculate or they are ready, go ahead and
108 : : * pass the scankeys to the index AM.
109 : : */
110 [ + + + - ]: 962 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
111 : 1924 : index_rescan(scandesc,
112 : 962 : node->ioss_ScanKeys,
113 : 962 : node->ioss_NumScanKeys,
114 : 962 : node->ioss_OrderByKeys,
115 : 962 : node->ioss_NumOrderByKeys);
116 : 962 : }
117 : :
118 : : /*
119 : : * OK, now that we have what we need, fetch the next tuple.
120 : : */
121 [ + + ]: 929783 : while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
122 : : {
123 : 896845 : bool tuple_from_heap = false;
124 : :
125 [ + - ]: 896845 : CHECK_FOR_INTERRUPTS();
126 : :
127 : : /*
128 : : * We can skip the heap fetch if the TID references a heap page on
129 : : * which all tuples are known visible to everybody. In any case,
130 : : * we'll use the index tuple not the heap tuple as the data source.
131 : : *
132 : : * Note on Memory Ordering Effects: visibilitymap_get_status does not
133 : : * lock the visibility map buffer, and therefore the result we read
134 : : * here could be slightly stale. However, it can't be stale enough to
135 : : * matter.
136 : : *
137 : : * We need to detect clearing a VM bit due to an insert right away,
138 : : * because the tuple is present in the index page but not visible. The
139 : : * reading of the TID by this scan (using a shared lock on the index
140 : : * buffer) is serialized with the insert of the TID into the index
141 : : * (using an exclusive lock on the index buffer). Because the VM bit
142 : : * is cleared before updating the index, and locking/unlocking of the
143 : : * index page acts as a full memory barrier, we are sure to see the
144 : : * cleared bit if we see a recently-inserted TID.
145 : : *
146 : : * Deletes do not update the index page (only VACUUM will clear out
147 : : * the TID), so the clearing of the VM bit by a delete is not
148 : : * serialized with this test below, and we may see a value that is
149 : : * significantly stale. However, we don't care about the delete right
150 : : * away, because the tuple is still visible until the deleting
151 : : * transaction commits or the statement ends (if it's our
152 : : * transaction). In either case, the lock on the VM buffer will have
153 : : * been released (acting as a write barrier) after clearing the bit.
154 : : * And for us to have a snapshot that includes the deleting
155 : : * transaction (making the tuple invisible), we must have acquired
156 : : * ProcArrayLock after that time, acting as a read barrier.
157 : : *
158 : : * It's worth going through this complexity to avoid needing to lock
159 : : * the VM buffer, which could cause significant contention.
160 : : */
161 [ + + ]: 896845 : if (!VM_ALL_VISIBLE(scandesc->heapRelation,
162 : : ItemPointerGetBlockNumber(tid),
163 : : &node->ioss_VMBuffer))
164 : : {
165 : : /*
166 : : * Rats, we have to visit the heap to check visibility.
167 : : */
168 [ + + ]: 315092 : InstrCountTuples2(node, 1);
169 [ + + ]: 315092 : if (!index_fetch_heap(scandesc, node->ioss_TableSlot))
170 : 35894 : continue; /* no visible tuple, try next index entry */
171 : :
172 : 279198 : ExecClearTuple(node->ioss_TableSlot);
173 : :
174 : : /*
175 : : * Only MVCC snapshots are supported here, so there should be no
176 : : * need to keep following the HOT chain once a visible entry has
177 : : * been found. If we did want to allow that, we'd need to keep
178 : : * more state to remember not to call index_getnext_tid next time.
179 : : */
180 [ + - ]: 279198 : if (scandesc->xs_heap_continue)
181 [ # # # # ]: 0 : elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
182 : :
183 : : /*
184 : : * Note: at this point we are holding a pin on the heap page, as
185 : : * recorded in scandesc->xs_cbuf. We could release that pin now,
186 : : * but it's not clear whether it's a win to do so. The next index
187 : : * entry might require a visit to the same heap page.
188 : : */
189 : :
190 : 279198 : tuple_from_heap = true;
191 : 279198 : }
192 : :
193 : : /*
194 : : * Fill the scan tuple slot with data from the index. This might be
195 : : * provided in either HeapTuple or IndexTuple format. Conceivably an
196 : : * index AM might fill both fields, in which case we prefer the heap
197 : : * format, since it's probably a bit cheaper to fill a slot from.
198 : : */
199 [ + + ]: 860951 : if (scandesc->xs_hitup)
200 : : {
201 : : /*
202 : : * We don't take the trouble to verify that the provided tuple has
203 : : * exactly the slot's format, but it seems worth doing a quick
204 : : * check on the number of fields.
205 : : */
206 [ + - ]: 232834 : Assert(slot->tts_tupleDescriptor->natts ==
207 : : scandesc->xs_hitupdesc->natts);
208 : 232834 : ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
209 : 232834 : }
210 [ + - ]: 628117 : else if (scandesc->xs_itup)
211 : 628117 : StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
212 : : else
213 [ # # # # ]: 0 : elog(ERROR, "no data returned for index-only scan");
214 : :
215 : : /*
216 : : * If the index was lossy, we have to recheck the index quals.
217 : : */
218 [ + + ]: 860951 : if (scandesc->xs_recheck)
219 : : {
220 : 2 : econtext->ecxt_scantuple = slot;
221 [ + + ]: 2 : if (!ExecQualAndReset(node->recheckqual, econtext))
222 : : {
223 : : /* Fails recheck, so drop it and loop back for another */
224 [ + - ]: 1 : InstrCountFiltered2(node, 1);
225 : 1 : continue;
226 : : }
227 : 1 : }
228 : :
229 : : /*
230 : : * We don't currently support rechecking ORDER BY distances. (In
231 : : * principle, if the index can support retrieval of the originally
232 : : * indexed value, it should be able to produce an exact distance
233 : : * calculation too. So it's not clear that adding code here for
234 : : * recheck/re-sort would be worth the trouble. But we should at least
235 : : * throw an error if someone tries it.)
236 : : */
237 [ + + + + ]: 860950 : if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
238 [ + - + - ]: 1 : ereport(ERROR,
239 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 : : errmsg("lossy distance functions are not supported in index-only scans")));
241 : :
242 : : /*
243 : : * If we didn't access the heap, then we'll need to take a predicate
244 : : * lock explicitly, as if we had. For now we do that at page level.
245 : : */
246 [ + + ]: 860949 : if (!tuple_from_heap)
247 : 1163506 : PredicateLockPage(scandesc->heapRelation,
248 : 581753 : ItemPointerGetBlockNumber(tid),
249 : 581753 : estate->es_snapshot);
250 : :
251 : 860949 : return slot;
252 [ + + ]: 896844 : }
253 : :
254 : : /*
255 : : * if we get here it means the index scan failed so we are at the end of
256 : : * the scan..
257 : : */
258 : 32938 : return ExecClearTuple(slot);
259 : 893887 : }
260 : :
261 : : /*
262 : : * StoreIndexTuple
263 : : * Fill the slot with data from the index tuple.
264 : : *
265 : : * At some point this might be generally-useful functionality, but
266 : : * right now we don't need it elsewhere.
267 : : */
268 : : static void
269 : 628117 : StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
270 : : IndexTuple itup, TupleDesc itupdesc)
271 : : {
272 : : /*
273 : : * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
274 : : * not the slot's tupdesc, in case the latter has different datatypes
275 : : * (this happens for btree name_ops in particular). They'd better have
276 : : * the same number of columns though, as well as being datatype-compatible
277 : : * which is something we can't so easily check.
278 : : */
279 [ + - ]: 628117 : Assert(slot->tts_tupleDescriptor->natts == itupdesc->natts);
280 : :
281 : 628117 : ExecClearTuple(slot);
282 : 628117 : index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
283 : :
284 : : /*
285 : : * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
286 : : * sized allocation. We mark this branch as unlikely as generally "name"
287 : : * is used only for the system catalogs and this would have to be a user
288 : : * query running on those or some other user table with an index on a name
289 : : * column.
290 : : */
291 [ + + ]: 628117 : if (unlikely(node->ioss_NameCStringAttNums != NULL))
292 : : {
293 : 95 : int attcount = node->ioss_NameCStringCount;
294 : :
295 [ + + ]: 190 : for (int idx = 0; idx < attcount; idx++)
296 : : {
297 : 95 : int attnum = node->ioss_NameCStringAttNums[idx];
298 : 95 : Name name;
299 : :
300 : : /* skip null Datums */
301 [ - + ]: 95 : if (slot->tts_isnull[attnum])
302 : 0 : continue;
303 : :
304 : : /* allocate the NAMEDATALEN and copy the datum into that memory */
305 : 95 : name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
306 : : NAMEDATALEN);
307 : :
308 : : /* use namestrcpy to zero-pad all trailing bytes */
309 : 95 : namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
310 : 95 : slot->tts_values[attnum] = NameGetDatum(name);
311 [ - - + ]: 95 : }
312 : 95 : }
313 : :
314 : 628117 : ExecStoreVirtualTuple(slot);
315 : 628117 : }
316 : :
317 : : /*
318 : : * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
319 : : *
320 : : * This can't really happen, since an index can't supply CTID which would
321 : : * be necessary data for any potential EvalPlanQual target relation. If it
322 : : * did happen, the EPQ code would pass us the wrong data, namely a heap
323 : : * tuple not an index tuple. So throw an error.
324 : : */
325 : : static bool
326 : 0 : IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
327 : : {
328 [ # # # # ]: 0 : elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
329 : 0 : return false; /* keep compiler quiet */
330 : : }
331 : :
332 : : /* ----------------------------------------------------------------
333 : : * ExecIndexOnlyScan(node)
334 : : * ----------------------------------------------------------------
335 : : */
336 : : static TupleTableSlot *
337 : 843763 : ExecIndexOnlyScan(PlanState *pstate)
338 : : {
339 : 843763 : IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
340 : :
341 : : /*
342 : : * If we have runtime keys and they've not already been set up, do it now.
343 : : */
344 [ + + + + ]: 843763 : if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
345 : 93 : ExecReScan((PlanState *) node);
346 : :
347 : 1687526 : return ExecScan(&node->ss,
348 : : (ExecScanAccessMtd) IndexOnlyNext,
349 : : (ExecScanRecheckMtd) IndexOnlyRecheck);
350 : 843763 : }
351 : :
352 : : /* ----------------------------------------------------------------
353 : : * ExecReScanIndexOnlyScan(node)
354 : : *
355 : : * Recalculates the values of any scan keys whose value depends on
356 : : * information known at runtime, then rescans the indexed relation.
357 : : *
358 : : * Updating the scan key was formerly done separately in
359 : : * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
360 : : * rescans of indices and relations/general streams more uniform.
361 : : * ----------------------------------------------------------------
362 : : */
363 : : void
364 : 37396 : ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
365 : : {
366 : : /*
367 : : * If we are doing runtime key calculations (ie, any of the index key
368 : : * values weren't simple Consts), compute the new key values. But first,
369 : : * reset the context so we don't leak memory as each outer tuple is
370 : : * scanned. Note this assumes that we will recalculate *all* runtime keys
371 : : * on each call.
372 : : */
373 [ + + ]: 37396 : if (node->ioss_NumRuntimeKeys != 0)
374 : : {
375 : 37363 : ExprContext *econtext = node->ioss_RuntimeContext;
376 : :
377 : 37363 : ResetExprContext(econtext);
378 : 74726 : ExecIndexEvalRuntimeKeys(econtext,
379 : 37363 : node->ioss_RuntimeKeys,
380 : 37363 : node->ioss_NumRuntimeKeys);
381 : 37363 : }
382 : 37396 : node->ioss_RuntimeKeysReady = true;
383 : :
384 : : /* reset index scan */
385 [ + + ]: 37396 : if (node->ioss_ScanDesc)
386 : 74184 : index_rescan(node->ioss_ScanDesc,
387 : 37092 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
388 : 37092 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
389 : :
390 : 37396 : ExecScanReScan(&node->ss);
391 : 37396 : }
392 : :
393 : :
394 : : /* ----------------------------------------------------------------
395 : : * ExecEndIndexOnlyScan
396 : : * ----------------------------------------------------------------
397 : : */
398 : : void
399 : 1957 : ExecEndIndexOnlyScan(IndexOnlyScanState *node)
400 : : {
401 : 1957 : Relation indexRelationDesc;
402 : 1957 : IndexScanDesc indexScanDesc;
403 : :
404 : : /*
405 : : * extract information from the node
406 : : */
407 : 1957 : indexRelationDesc = node->ioss_RelationDesc;
408 : 1957 : indexScanDesc = node->ioss_ScanDesc;
409 : :
410 : : /* Release VM buffer pin, if any. */
411 [ + + ]: 1957 : if (node->ioss_VMBuffer != InvalidBuffer)
412 : : {
413 : 483 : ReleaseBuffer(node->ioss_VMBuffer);
414 : 483 : node->ioss_VMBuffer = InvalidBuffer;
415 : 483 : }
416 : :
417 : : /*
418 : : * When ending a parallel worker, copy the statistics gathered by the
419 : : * worker back into shared memory so that it can be picked up by the main
420 : : * process to report in EXPLAIN ANALYZE
421 : : */
422 [ - + # # ]: 1957 : if (node->ioss_SharedInfo != NULL && IsParallelWorker())
423 : : {
424 : 0 : IndexScanInstrumentation *winstrument;
425 : :
426 [ # # ]: 0 : Assert(ParallelWorkerNumber <= node->ioss_SharedInfo->num_workers);
427 : 0 : winstrument = &node->ioss_SharedInfo->winstrument[ParallelWorkerNumber];
428 : :
429 : : /*
430 : : * We have to accumulate the stats rather than performing a memcpy.
431 : : * When a Gather/GatherMerge node finishes it will perform planner
432 : : * shutdown on the workers. On rescan it will spin up new workers
433 : : * which will have a new IndexOnlyScanState and zeroed stats.
434 : : */
435 : 0 : winstrument->nsearches += node->ioss_Instrument.nsearches;
436 : 0 : }
437 : :
438 : : /*
439 : : * close the index relation (no-op if we didn't open it)
440 : : */
441 [ + + ]: 1957 : if (indexScanDesc)
442 : 994 : index_endscan(indexScanDesc);
443 [ + + ]: 1957 : if (indexRelationDesc)
444 : 1569 : index_close(indexRelationDesc, NoLock);
445 : 1957 : }
446 : :
447 : : /* ----------------------------------------------------------------
448 : : * ExecIndexOnlyMarkPos
449 : : *
450 : : * Note: we assume that no caller attempts to set a mark before having read
451 : : * at least one tuple. Otherwise, ioss_ScanDesc might still be NULL.
452 : : * ----------------------------------------------------------------
453 : : */
454 : : void
455 : 20000 : ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
456 : : {
457 : 20000 : EState *estate = node->ss.ps.state;
458 : 20000 : EPQState *epqstate = estate->es_epq_active;
459 : :
460 [ + - ]: 20000 : if (epqstate != NULL)
461 : : {
462 : : /*
463 : : * We are inside an EvalPlanQual recheck. If a test tuple exists for
464 : : * this relation, then we shouldn't access the index at all. We would
465 : : * instead need to save, and later restore, the state of the
466 : : * relsubs_done flag, so that re-fetching the test tuple is possible.
467 : : * However, given the assumption that no caller sets a mark at the
468 : : * start of the scan, we can only get here with relsubs_done[i]
469 : : * already set, and so no state need be saved.
470 : : */
471 : 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
472 : :
473 [ # # ]: 0 : Assert(scanrelid > 0);
474 [ # # # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
475 : 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
476 : : {
477 : : /* Verify the claim above */
478 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
479 [ # # # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
480 : 0 : return;
481 : : }
482 [ # # ]: 0 : }
483 : :
484 : 20000 : index_markpos(node->ioss_ScanDesc);
485 [ - + ]: 20000 : }
486 : :
487 : : /* ----------------------------------------------------------------
488 : : * ExecIndexOnlyRestrPos
489 : : * ----------------------------------------------------------------
490 : : */
491 : : void
492 : 0 : ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
493 : : {
494 : 0 : EState *estate = node->ss.ps.state;
495 : 0 : EPQState *epqstate = estate->es_epq_active;
496 : :
497 [ # # ]: 0 : if (estate->es_epq_active != NULL)
498 : : {
499 : : /* See comments in ExecIndexMarkPos */
500 : 0 : Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
501 : :
502 [ # # ]: 0 : Assert(scanrelid > 0);
503 [ # # # # ]: 0 : if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
504 : 0 : epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
505 : : {
506 : : /* Verify the claim above */
507 [ # # ]: 0 : if (!epqstate->relsubs_done[scanrelid - 1])
508 [ # # # # ]: 0 : elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
509 : 0 : return;
510 : : }
511 [ # # ]: 0 : }
512 : :
513 : 0 : index_restrpos(node->ioss_ScanDesc);
514 [ # # ]: 0 : }
515 : :
516 : : /* ----------------------------------------------------------------
517 : : * ExecInitIndexOnlyScan
518 : : *
519 : : * Initializes the index scan's state information, creates
520 : : * scan keys, and opens the base and index relations.
521 : : *
522 : : * Note: index scans have 2 sets of state information because
523 : : * we have to keep track of the base relation and the
524 : : * index relation.
525 : : * ----------------------------------------------------------------
526 : : */
527 : : IndexOnlyScanState *
528 : 1965 : ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
529 : : {
530 : 1965 : IndexOnlyScanState *indexstate;
531 : 1965 : Relation currentRelation;
532 : 1965 : Relation indexRelation;
533 : 1965 : LOCKMODE lockmode;
534 : 1965 : TupleDesc tupDesc;
535 : 1965 : int indnkeyatts;
536 : 1965 : int namecount;
537 : :
538 : : /*
539 : : * create state structure
540 : : */
541 : 1965 : indexstate = makeNode(IndexOnlyScanState);
542 : 1965 : indexstate->ss.ps.plan = (Plan *) node;
543 : 1965 : indexstate->ss.ps.state = estate;
544 : 1965 : indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
545 : :
546 : : /*
547 : : * Miscellaneous initialization
548 : : *
549 : : * create expression context for node
550 : : */
551 : 1965 : ExecAssignExprContext(estate, &indexstate->ss.ps);
552 : :
553 : : /*
554 : : * open the scan relation
555 : : */
556 : 1965 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
557 : :
558 : 1965 : indexstate->ss.ss_currentRelation = currentRelation;
559 : 1965 : indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
560 : :
561 : : /*
562 : : * Build the scan tuple type using the indextlist generated by the
563 : : * planner. We use this, rather than the index's physical tuple
564 : : * descriptor, because the latter contains storage column types not the
565 : : * types of the original datums. (It's the AM's responsibility to return
566 : : * suitable data anyway.)
567 : : */
568 : 1965 : tupDesc = ExecTypeFromTL(node->indextlist);
569 : 1965 : ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
570 : : &TTSOpsVirtual);
571 : :
572 : : /*
573 : : * We need another slot, in a format that's suitable for the table AM, for
574 : : * when we need to fetch a tuple from the table for rechecking visibility.
575 : : */
576 : 1965 : indexstate->ioss_TableSlot =
577 : 3930 : ExecAllocTableSlot(&estate->es_tupleTable,
578 : 1965 : RelationGetDescr(currentRelation),
579 : 1965 : table_slot_callbacks(currentRelation));
580 : :
581 : : /*
582 : : * Initialize result type and projection info. The node's targetlist will
583 : : * contain Vars with varno = INDEX_VAR, referencing the scan tuple.
584 : : */
585 : 1965 : ExecInitResultTypeTL(&indexstate->ss.ps);
586 : 1965 : ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
587 : :
588 : : /*
589 : : * initialize child expressions
590 : : *
591 : : * Note: we don't initialize all of the indexorderby expression, only the
592 : : * sub-parts corresponding to runtime keys (see below).
593 : : */
594 : 1965 : indexstate->ss.ps.qual =
595 : 1965 : ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
596 : 1965 : indexstate->recheckqual =
597 : 1965 : ExecInitQual(node->recheckqual, (PlanState *) indexstate);
598 : :
599 : : /*
600 : : * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
601 : : * here. This allows an index-advisor plugin to EXPLAIN a plan containing
602 : : * references to nonexistent indexes.
603 : : */
604 [ + + ]: 1965 : if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
605 : 388 : return indexstate;
606 : :
607 : : /* Open the index relation. */
608 : 1577 : lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
609 : 1577 : indexRelation = index_open(node->indexid, lockmode);
610 : 1577 : indexstate->ioss_RelationDesc = indexRelation;
611 : :
612 : : /*
613 : : * Initialize index-specific scan state
614 : : */
615 : 1577 : indexstate->ioss_RuntimeKeysReady = false;
616 : 1577 : indexstate->ioss_RuntimeKeys = NULL;
617 : 1577 : indexstate->ioss_NumRuntimeKeys = 0;
618 : :
619 : : /*
620 : : * build the index scan keys from the index qualification
621 : : */
622 : 3154 : ExecIndexBuildScanKeys((PlanState *) indexstate,
623 : 1577 : indexRelation,
624 : 1577 : node->indexqual,
625 : : false,
626 : 1577 : &indexstate->ioss_ScanKeys,
627 : 1577 : &indexstate->ioss_NumScanKeys,
628 : 1577 : &indexstate->ioss_RuntimeKeys,
629 : 1577 : &indexstate->ioss_NumRuntimeKeys,
630 : : NULL, /* no ArrayKeys */
631 : : NULL);
632 : :
633 : : /*
634 : : * any ORDER BY exprs have to be turned into scankeys in the same way
635 : : */
636 : 3154 : ExecIndexBuildScanKeys((PlanState *) indexstate,
637 : 1577 : indexRelation,
638 : 1577 : node->indexorderby,
639 : : true,
640 : 1577 : &indexstate->ioss_OrderByKeys,
641 : 1577 : &indexstate->ioss_NumOrderByKeys,
642 : 1577 : &indexstate->ioss_RuntimeKeys,
643 : 1577 : &indexstate->ioss_NumRuntimeKeys,
644 : : NULL, /* no ArrayKeys */
645 : : NULL);
646 : :
647 : : /*
648 : : * If we have runtime keys, we need an ExprContext to evaluate them. The
649 : : * node's standard context won't do because we want to reset that context
650 : : * for every tuple. So, build another context just like the other one...
651 : : * -tgl 7/11/00
652 : : */
653 [ + + ]: 1577 : if (indexstate->ioss_NumRuntimeKeys != 0)
654 : : {
655 : 349 : ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
656 : :
657 : 349 : ExecAssignExprContext(estate, &indexstate->ss.ps);
658 : 349 : indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
659 : 349 : indexstate->ss.ps.ps_ExprContext = stdecontext;
660 : 349 : }
661 : : else
662 : : {
663 : 1228 : indexstate->ioss_RuntimeContext = NULL;
664 : : }
665 : :
666 : 1577 : indexstate->ioss_NameCStringAttNums = NULL;
667 : 1577 : indnkeyatts = indexRelation->rd_index->indnkeyatts;
668 : 1577 : namecount = 0;
669 : :
670 : : /*
671 : : * The "name" type for btree uses text_ops which results in storing
672 : : * cstrings in the indexed keys rather than names. Here we detect that in
673 : : * a generic way in case other index AMs want to do the same optimization.
674 : : * Check for opclasses with an opcintype of NAMEOID and an index tuple
675 : : * descriptor with CSTRINGOID. If any of these are found, create an array
676 : : * marking the index attribute number of each of them. StoreIndexTuple()
677 : : * handles copying the name Datums into a NAMEDATALEN-byte allocation.
678 : : */
679 : :
680 : : /* First, count the number of such index keys */
681 [ + + ]: 3375 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
682 : : {
683 [ + + - + ]: 1798 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
684 : 35 : indexRelation->rd_opcintype[attnum] == NAMEOID)
685 : 35 : namecount++;
686 : 1798 : }
687 : :
688 [ + + ]: 1577 : if (namecount > 0)
689 : : {
690 : 35 : int idx = 0;
691 : :
692 : : /*
693 : : * Now create an array to mark the attribute numbers of the keys that
694 : : * need to be converted from cstring to name.
695 : : */
696 : 35 : indexstate->ioss_NameCStringAttNums = palloc_array(AttrNumber, namecount);
697 : :
698 [ + + ]: 118 : for (int attnum = 0; attnum < indnkeyatts; attnum++)
699 : : {
700 [ + + - + ]: 83 : if (TupleDescAttr(indexRelation->rd_att, attnum)->atttypid == CSTRINGOID &&
701 : 35 : indexRelation->rd_opcintype[attnum] == NAMEOID)
702 : 35 : indexstate->ioss_NameCStringAttNums[idx++] = (AttrNumber) attnum;
703 : 83 : }
704 : 35 : }
705 : :
706 : 1577 : indexstate->ioss_NameCStringCount = namecount;
707 : :
708 : : /*
709 : : * all done.
710 : : */
711 : 1577 : return indexstate;
712 : 1965 : }
713 : :
714 : : /* ----------------------------------------------------------------
715 : : * Parallel Index-only Scan Support
716 : : * ----------------------------------------------------------------
717 : : */
718 : :
719 : : /* ----------------------------------------------------------------
720 : : * ExecIndexOnlyScanEstimate
721 : : *
722 : : * Compute the amount of space we'll need in the parallel
723 : : * query DSM, and inform pcxt->estimator about our needs.
724 : : * ----------------------------------------------------------------
725 : : */
726 : : void
727 : 9 : ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
728 : : ParallelContext *pcxt)
729 : : {
730 : 9 : EState *estate = node->ss.ps.state;
731 : 9 : bool instrument = (node->ss.ps.instrument != NULL);
732 : 9 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
733 : :
734 [ + - + + ]: 9 : if (!instrument && !parallel_aware)
735 : : {
736 : : /* No DSM required by the scan */
737 : 2 : return;
738 : : }
739 : :
740 : 14 : node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
741 : 7 : node->ioss_NumScanKeys,
742 : 7 : node->ioss_NumOrderByKeys,
743 : 7 : estate->es_snapshot,
744 : 7 : instrument, parallel_aware,
745 : 7 : pcxt->nworkers);
746 : 7 : shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
747 : 7 : shm_toc_estimate_keys(&pcxt->estimator, 1);
748 [ - + ]: 9 : }
749 : :
750 : : /* ----------------------------------------------------------------
751 : : * ExecIndexOnlyScanInitializeDSM
752 : : *
753 : : * Set up a parallel index-only scan descriptor.
754 : : * ----------------------------------------------------------------
755 : : */
756 : : void
757 : 9 : ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
758 : : ParallelContext *pcxt)
759 : : {
760 : 9 : EState *estate = node->ss.ps.state;
761 : 9 : ParallelIndexScanDesc piscan;
762 : 9 : bool instrument = node->ss.ps.instrument != NULL;
763 : 9 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
764 : :
765 [ + - + + ]: 9 : if (!instrument && !parallel_aware)
766 : : {
767 : : /* No DSM required by the scan */
768 : 2 : return;
769 : : }
770 : :
771 : 7 : piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
772 : 14 : index_parallelscan_initialize(node->ss.ss_currentRelation,
773 : 7 : node->ioss_RelationDesc,
774 : 7 : estate->es_snapshot,
775 : 7 : instrument, parallel_aware, pcxt->nworkers,
776 : 7 : &node->ioss_SharedInfo, piscan);
777 : 7 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
778 : :
779 [ + - ]: 7 : if (!parallel_aware)
780 : : {
781 : : /* Only here to initialize SharedInfo in DSM */
782 : 0 : return;
783 : : }
784 : :
785 : 7 : node->ioss_ScanDesc =
786 : 14 : index_beginscan_parallel(node->ss.ss_currentRelation,
787 : 7 : node->ioss_RelationDesc,
788 : 7 : &node->ioss_Instrument,
789 : 7 : node->ioss_NumScanKeys,
790 : 7 : node->ioss_NumOrderByKeys,
791 : 7 : piscan);
792 : 7 : node->ioss_ScanDesc->xs_want_itup = true;
793 : 7 : node->ioss_VMBuffer = InvalidBuffer;
794 : :
795 : : /*
796 : : * If no run-time keys to calculate or they are ready, go ahead and pass
797 : : * the scankeys to the index AM.
798 : : */
799 [ - + # # ]: 7 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
800 : 14 : index_rescan(node->ioss_ScanDesc,
801 : 7 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
802 : 7 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
803 [ - + ]: 9 : }
804 : :
805 : : /* ----------------------------------------------------------------
806 : : * ExecIndexOnlyScanReInitializeDSM
807 : : *
808 : : * Reset shared state before beginning a fresh scan.
809 : : * ----------------------------------------------------------------
810 : : */
811 : : void
812 : 2 : ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
813 : : ParallelContext *pcxt)
814 : : {
815 [ + - ]: 2 : Assert(node->ss.ps.plan->parallel_aware);
816 : 2 : index_parallelrescan(node->ioss_ScanDesc);
817 : 2 : }
818 : :
819 : : /* ----------------------------------------------------------------
820 : : * ExecIndexOnlyScanInitializeWorker
821 : : *
822 : : * Copy relevant information from TOC into planstate.
823 : : * ----------------------------------------------------------------
824 : : */
825 : : void
826 : 39 : ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node,
827 : : ParallelWorkerContext *pwcxt)
828 : : {
829 : 39 : ParallelIndexScanDesc piscan;
830 : 39 : bool instrument = node->ss.ps.instrument != NULL;
831 : 39 : bool parallel_aware = node->ss.ps.plan->parallel_aware;
832 : :
833 [ + - + + ]: 39 : if (!instrument && !parallel_aware)
834 : : {
835 : : /* No DSM required by the scan */
836 : 6 : return;
837 : : }
838 : :
839 : 33 : piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
840 : :
841 [ + - ]: 33 : if (instrument)
842 : 0 : node->ioss_SharedInfo = (SharedIndexScanInstrumentation *)
843 : 0 : OffsetToPointer(piscan, piscan->ps_offset_ins);
844 : :
845 [ + - ]: 33 : if (!parallel_aware)
846 : : {
847 : : /* Only here to set up worker node's SharedInfo */
848 : 0 : return;
849 : : }
850 : :
851 : 33 : node->ioss_ScanDesc =
852 : 66 : index_beginscan_parallel(node->ss.ss_currentRelation,
853 : 33 : node->ioss_RelationDesc,
854 : 33 : &node->ioss_Instrument,
855 : 33 : node->ioss_NumScanKeys,
856 : 33 : node->ioss_NumOrderByKeys,
857 : 33 : piscan);
858 : 33 : node->ioss_ScanDesc->xs_want_itup = true;
859 : :
860 : : /*
861 : : * If no run-time keys to calculate or they are ready, go ahead and pass
862 : : * the scankeys to the index AM.
863 : : */
864 [ - + # # ]: 33 : if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
865 : 66 : index_rescan(node->ioss_ScanDesc,
866 : 33 : node->ioss_ScanKeys, node->ioss_NumScanKeys,
867 : 33 : node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
868 [ - + ]: 39 : }
869 : :
870 : : /* ----------------------------------------------------------------
871 : : * ExecIndexOnlyScanRetrieveInstrumentation
872 : : *
873 : : * Transfer index-only scan statistics from DSM to private memory.
874 : : * ----------------------------------------------------------------
875 : : */
876 : : void
877 : 0 : ExecIndexOnlyScanRetrieveInstrumentation(IndexOnlyScanState *node)
878 : : {
879 : 0 : SharedIndexScanInstrumentation *SharedInfo = node->ioss_SharedInfo;
880 : 0 : size_t size;
881 : :
882 [ # # ]: 0 : if (SharedInfo == NULL)
883 : 0 : return;
884 : :
885 : : /* Create a copy of SharedInfo in backend-local memory */
886 : 0 : size = offsetof(SharedIndexScanInstrumentation, winstrument) +
887 : 0 : SharedInfo->num_workers * sizeof(IndexScanInstrumentation);
888 : 0 : node->ioss_SharedInfo = palloc(size);
889 : 0 : memcpy(node->ioss_SharedInfo, SharedInfo, size);
890 [ # # ]: 0 : }
|