LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsearch.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 89.4 % 748 669
Test Date: 2026-01-26 10:56:24 Functions: 100.0 % 16 16
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 65.3 % 623 407

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * nbtsearch.c
       4                 :             :  *        Search code for postgres btrees.
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/access/nbtree/nbtsearch.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : 
      16                 :             : #include "postgres.h"
      17                 :             : 
      18                 :             : #include "access/nbtree.h"
      19                 :             : #include "access/relscan.h"
      20                 :             : #include "access/xact.h"
      21                 :             : #include "executor/instrument_node.h"
      22                 :             : #include "miscadmin.h"
      23                 :             : #include "pgstat.h"
      24                 :             : #include "storage/predicate.h"
      25                 :             : #include "utils/lsyscache.h"
      26                 :             : #include "utils/rel.h"
      27                 :             : 
      28                 :             : 
      29                 :             : static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
      30                 :             : static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
      31                 :             :                                                         Buffer buf, bool forupdate, BTStack stack,
      32                 :             :                                                         int access);
      33                 :             : static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf);
      34                 :             : static int      _bt_binsrch_posting(BTScanInsert key, Page page,
      35                 :             :                                                                 OffsetNumber offnum);
      36                 :             : static inline void _bt_returnitem(IndexScanDesc scan, BTScanOpaque so);
      37                 :             : static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
      38                 :             : static bool _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum,
      39                 :             :                                                           ScanDirection dir);
      40                 :             : static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
      41                 :             :                                                          BlockNumber lastcurrblkno, ScanDirection dir,
      42                 :             :                                                          bool seized);
      43                 :             : static Buffer _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno,
      44                 :             :                                                                                  BlockNumber lastcurrblkno);
      45                 :             : static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
      46                 :             : 
      47                 :             : 
      48                 :             : /*
      49                 :             :  *      _bt_drop_lock_and_maybe_pin()
      50                 :             :  *
      51                 :             :  * Unlock so->currPos.buf.  If scan is so->dropPin, drop the pin, too.
      52                 :             :  * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
      53                 :             :  */
      54                 :             : static inline void
      55                 :      742818 : _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
      56                 :             : {
      57         [ +  + ]:      742818 :         if (!so->dropPin)
      58                 :             :         {
      59                 :             :                 /* Just drop the lock (not the pin) */
      60                 :       34952 :                 _bt_unlockbuf(rel, so->currPos.buf);
      61                 :       34952 :                 return;
      62                 :             :         }
      63                 :             : 
      64                 :             :         /*
      65                 :             :          * Drop both the lock and the pin.
      66                 :             :          *
      67                 :             :          * Have to set so->currPos.lsn so that _bt_killitems has a way to detect
      68                 :             :          * when concurrent heap TID recycling by VACUUM might have taken place.
      69                 :             :          */
      70   [ +  -  +  + ]:      707866 :         Assert(RelationNeedsWAL(rel));
      71                 :      707866 :         so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
      72                 :      707866 :         _bt_relbuf(rel, so->currPos.buf);
      73                 :      707866 :         so->currPos.buf = InvalidBuffer;
      74                 :      742818 : }
      75                 :             : 
      76                 :             : /*
      77                 :             :  *      _bt_search() -- Search the tree for a particular scankey,
      78                 :             :  *              or more precisely for the first leaf page it could be on.
      79                 :             :  *
      80                 :             :  * The passed scankey is an insertion-type scankey (see nbtree/README),
      81                 :             :  * but it can omit the rightmost column(s) of the index.
      82                 :             :  *
      83                 :             :  * Return value is a stack of parent-page pointers (i.e. there is no entry for
      84                 :             :  * the leaf level/page).  *bufP is set to the address of the leaf-page buffer,
      85                 :             :  * which is locked and pinned.  No locks are held on the parent pages,
      86                 :             :  * however!
      87                 :             :  *
      88                 :             :  * The returned buffer is locked according to access parameter.  Additionally,
      89                 :             :  * access = BT_WRITE will allow an empty root page to be created and returned.
      90                 :             :  * When access = BT_READ, an empty index will result in *bufP being set to
      91                 :             :  * InvalidBuffer.  Also, in BT_WRITE mode, any incomplete splits encountered
      92                 :             :  * during the search will be finished.
      93                 :             :  *
      94                 :             :  * heaprel must be provided by callers that pass access = BT_WRITE, since we
      95                 :             :  * might need to allocate a new root page for caller -- see _bt_allocbuf.
      96                 :             :  */
      97                 :             : BTStack
      98                 :     1839272 : _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
      99                 :             :                    int access)
     100                 :             : {
     101                 :     1839272 :         BTStack         stack_in = NULL;
     102                 :     1839272 :         int                     page_access = BT_READ;
     103                 :             : 
     104                 :             :         /* heaprel must be set whenever _bt_allocbuf is reachable */
     105   [ +  +  +  - ]:     1839272 :         Assert(access == BT_READ || access == BT_WRITE);
     106   [ +  +  +  - ]:     1839272 :         Assert(access == BT_READ || heaprel != NULL);
     107                 :             : 
     108                 :             :         /* Get the root page to start with */
     109                 :     1839272 :         *bufP = _bt_getroot(rel, heaprel, access);
     110                 :             : 
     111                 :             :         /* If index is empty and access = BT_READ, no root page is created. */
     112         [ +  + ]:     1839272 :         if (!BufferIsValid(*bufP))
     113                 :       27213 :                 return (BTStack) NULL;
     114                 :             : 
     115                 :             :         /* Loop iterates once per level descended in the tree */
     116                 :     3392365 :         for (;;)
     117                 :             :         {
     118                 :     3392365 :                 Page            page;
     119                 :     3392365 :                 BTPageOpaque opaque;
     120                 :     3392365 :                 OffsetNumber offnum;
     121                 :     3392365 :                 ItemId          itemid;
     122                 :     3392365 :                 IndexTuple      itup;
     123                 :     3392365 :                 BlockNumber child;
     124                 :     3392365 :                 BTStack         new_stack;
     125                 :             : 
     126                 :             :                 /*
     127                 :             :                  * Race -- the page we just grabbed may have split since we read its
     128                 :             :                  * downlink in its parent page (or the metapage).  If it has, we may
     129                 :             :                  * need to move right to its new sibling.  Do that.
     130                 :             :                  *
     131                 :             :                  * In write-mode, allow _bt_moveright to finish any incomplete splits
     132                 :             :                  * along the way.  Strictly speaking, we'd only need to finish an
     133                 :             :                  * incomplete split on the leaf page we're about to insert to, not on
     134                 :             :                  * any of the upper levels (internal pages with incomplete splits are
     135                 :             :                  * also taken care of in _bt_getstackbuf).  But this is a good
     136                 :             :                  * opportunity to finish splits of internal pages too.
     137                 :             :                  */
     138                 :     6784730 :                 *bufP = _bt_moveright(rel, heaprel, key, *bufP, (access == BT_WRITE),
     139                 :     3392365 :                                                           stack_in, page_access);
     140                 :             : 
     141                 :             :                 /* if this is a leaf page, we're done */
     142                 :     3392365 :                 page = BufferGetPage(*bufP);
     143                 :     3392365 :                 opaque = BTPageGetOpaque(page);
     144         [ +  + ]:     3392365 :                 if (P_ISLEAF(opaque))
     145                 :     1812059 :                         break;
     146                 :             : 
     147                 :             :                 /*
     148                 :             :                  * Find the appropriate pivot tuple on this page.  Its downlink points
     149                 :             :                  * to the child page that we're about to descend to.
     150                 :             :                  */
     151                 :     1580306 :                 offnum = _bt_binsrch(rel, key, *bufP);
     152                 :     1580306 :                 itemid = PageGetItemId(page, offnum);
     153                 :     1580306 :                 itup = (IndexTuple) PageGetItem(page, itemid);
     154   [ -  +  #  # ]:     1580306 :                 Assert(BTreeTupleIsPivot(itup) || !key->heapkeyspace);
     155                 :     1580306 :                 child = BTreeTupleGetDownLink(itup);
     156                 :             : 
     157                 :             :                 /*
     158                 :             :                  * We need to save the location of the pivot tuple we chose in a new
     159                 :             :                  * stack entry for this page/level.  If caller ends up splitting a
     160                 :             :                  * page one level down, it usually ends up inserting a new pivot
     161                 :             :                  * tuple/downlink immediately after the location recorded here.
     162                 :             :                  */
     163                 :     1580306 :                 new_stack = (BTStack) palloc_object(BTStackData);
     164                 :     1580306 :                 new_stack->bts_blkno = BufferGetBlockNumber(*bufP);
     165                 :     1580306 :                 new_stack->bts_offset = offnum;
     166                 :     1580306 :                 new_stack->bts_parent = stack_in;
     167                 :             : 
     168                 :             :                 /*
     169                 :             :                  * Page level 1 is lowest non-leaf page level prior to leaves.  So, if
     170                 :             :                  * we're on the level 1 and asked to lock leaf page in write mode,
     171                 :             :                  * then lock next page in write mode, because it must be a leaf.
     172                 :             :                  */
     173   [ +  +  +  + ]:     1580306 :                 if (opaque->btpo_level == 1 && access == BT_WRITE)
     174                 :      726914 :                         page_access = BT_WRITE;
     175                 :             : 
     176                 :             :                 /* drop the read lock on the page, then acquire one on its child */
     177                 :     1580306 :                 *bufP = _bt_relandgetbuf(rel, *bufP, child, page_access);
     178                 :             : 
     179                 :             :                 /* okay, all set to move down a level */
     180                 :     1580306 :                 stack_in = new_stack;
     181      [ +  -  + ]:     3392365 :         }
     182                 :             : 
     183                 :             :         /*
     184                 :             :          * If we're asked to lock leaf in write mode, but didn't manage to, then
     185                 :             :          * relock.  This should only happen when the root page is a leaf page (and
     186                 :             :          * the only page in the index other than the metapage).
     187                 :             :          */
     188   [ +  +  +  + ]:     1812059 :         if (access == BT_WRITE && page_access == BT_READ)
     189                 :             :         {
     190                 :             :                 /* trade in our read lock for a write lock */
     191                 :       63664 :                 _bt_unlockbuf(rel, *bufP);
     192                 :       63664 :                 _bt_lockbuf(rel, *bufP, BT_WRITE);
     193                 :             : 
     194                 :             :                 /*
     195                 :             :                  * Race -- the leaf page may have split after we dropped the read lock
     196                 :             :                  * but before we acquired a write lock.  If it has, we may need to
     197                 :             :                  * move right to its new sibling.  Do that.
     198                 :             :                  */
     199                 :       63664 :                 *bufP = _bt_moveright(rel, heaprel, key, *bufP, true, stack_in, BT_WRITE);
     200                 :       63664 :         }
     201                 :             : 
     202                 :     1812059 :         return stack_in;
     203                 :     1839272 : }
     204                 :             : 
     205                 :             : /*
     206                 :             :  *      _bt_moveright() -- move right in the btree if necessary.
     207                 :             :  *
     208                 :             :  * When we follow a pointer to reach a page, it is possible that
     209                 :             :  * the page has changed in the meanwhile.  If this happens, we're
     210                 :             :  * guaranteed that the page has "split right" -- that is, that any
     211                 :             :  * data that appeared on the page originally is either on the page
     212                 :             :  * or strictly to the right of it.
     213                 :             :  *
     214                 :             :  * This routine decides whether or not we need to move right in the
     215                 :             :  * tree by examining the high key entry on the page.  If that entry is
     216                 :             :  * strictly less than the scankey, or <= the scankey in the
     217                 :             :  * key.nextkey=true case, then we followed the wrong link and we need
     218                 :             :  * to move right.
     219                 :             :  *
     220                 :             :  * The passed insertion-type scankey can omit the rightmost column(s) of the
     221                 :             :  * index. (see nbtree/README)
     222                 :             :  *
     223                 :             :  * When key.nextkey is false (the usual case), we are looking for the first
     224                 :             :  * item >= key.  When key.nextkey is true, we are looking for the first item
     225                 :             :  * strictly greater than key.
     226                 :             :  *
     227                 :             :  * If forupdate is true, we will attempt to finish any incomplete splits
     228                 :             :  * that we encounter.  This is required when locking a target page for an
     229                 :             :  * insertion, because we don't allow inserting on a page before the split is
     230                 :             :  * completed.  'heaprel' and 'stack' are only used if forupdate is true.
     231                 :             :  *
     232                 :             :  * On entry, we have the buffer pinned and a lock of the type specified by
     233                 :             :  * 'access'.  If we move right, we release the buffer and lock and acquire
     234                 :             :  * the same on the right sibling.  Return value is the buffer we stop at.
     235                 :             :  */
     236                 :             : static Buffer
     237                 :     3456029 : _bt_moveright(Relation rel,
     238                 :             :                           Relation heaprel,
     239                 :             :                           BTScanInsert key,
     240                 :             :                           Buffer buf,
     241                 :             :                           bool forupdate,
     242                 :             :                           BTStack stack,
     243                 :             :                           int access)
     244                 :             : {
     245                 :     3456029 :         Page            page;
     246                 :     3456029 :         BTPageOpaque opaque;
     247                 :     3456029 :         int32           cmpval;
     248                 :             : 
     249   [ +  +  +  - ]:     3456029 :         Assert(!forupdate || heaprel != NULL);
     250                 :             : 
     251                 :             :         /*
     252                 :             :          * When nextkey = false (normal case): if the scan key that brought us to
     253                 :             :          * this page is > the high key stored on the page, then the page has split
     254                 :             :          * and we need to move right.  (pg_upgrade'd !heapkeyspace indexes could
     255                 :             :          * have some duplicates to the right as well as the left, but that's
     256                 :             :          * something that's only ever dealt with on the leaf level, after
     257                 :             :          * _bt_search has found an initial leaf page.)
     258                 :             :          *
     259                 :             :          * When nextkey = true: move right if the scan key is >= page's high key.
     260                 :             :          * (Note that key.scantid cannot be set in this case.)
     261                 :             :          *
     262                 :             :          * The page could even have split more than once, so scan as far as
     263                 :             :          * needed.
     264                 :             :          *
     265                 :             :          * We also have to move right if we followed a link that brought us to a
     266                 :             :          * dead page.
     267                 :             :          */
     268                 :     3456029 :         cmpval = key->nextkey ? 0 : 1;
     269                 :             : 
     270                 :     3456029 :         for (;;)
     271                 :             :         {
     272                 :     3456154 :                 page = BufferGetPage(buf);
     273                 :     3456154 :                 opaque = BTPageGetOpaque(page);
     274                 :             : 
     275         [ +  + ]:     3456154 :                 if (P_RIGHTMOST(opaque))
     276                 :     2814928 :                         break;
     277                 :             : 
     278                 :             :                 /*
     279                 :             :                  * Finish any incomplete splits we encounter along the way.
     280                 :             :                  */
     281   [ +  +  +  - ]:      641226 :                 if (forupdate && P_INCOMPLETE_SPLIT(opaque))
     282                 :             :                 {
     283                 :           0 :                         BlockNumber blkno = BufferGetBlockNumber(buf);
     284                 :             : 
     285                 :             :                         /* upgrade our lock if necessary */
     286         [ #  # ]:           0 :                         if (access == BT_READ)
     287                 :             :                         {
     288                 :           0 :                                 _bt_unlockbuf(rel, buf);
     289                 :           0 :                                 _bt_lockbuf(rel, buf, BT_WRITE);
     290                 :           0 :                         }
     291                 :             : 
     292         [ #  # ]:           0 :                         if (P_INCOMPLETE_SPLIT(opaque))
     293                 :           0 :                                 _bt_finish_split(rel, heaprel, buf, stack);
     294                 :             :                         else
     295                 :           0 :                                 _bt_relbuf(rel, buf);
     296                 :             : 
     297                 :             :                         /* re-acquire the lock in the right mode, and re-check */
     298                 :           0 :                         buf = _bt_getbuf(rel, blkno, access);
     299                 :             :                         continue;
     300                 :           0 :                 }
     301                 :             : 
     302   [ +  -  +  + ]:      641226 :                 if (P_IGNORE(opaque) || _bt_compare(rel, key, page, P_HIKEY) >= cmpval)
     303                 :             :                 {
     304                 :             :                         /* step right one page */
     305                 :         125 :                         buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access);
     306                 :         125 :                         continue;
     307                 :             :                 }
     308                 :             :                 else
     309                 :      641101 :                         break;
     310                 :             :         }
     311                 :             : 
     312         [ +  - ]:     3456029 :         if (P_IGNORE(opaque))
     313   [ #  #  #  # ]:           0 :                 elog(ERROR, "fell off the end of index \"%s\"",
     314                 :             :                          RelationGetRelationName(rel));
     315                 :             : 
     316                 :     6912058 :         return buf;
     317                 :     3456029 : }
     318                 :             : 
     319                 :             : /*
     320                 :             :  *      _bt_binsrch() -- Do a binary search for a key on a particular page.
     321                 :             :  *
     322                 :             :  * On an internal (non-leaf) page, _bt_binsrch() returns the OffsetNumber
     323                 :             :  * of the last key < given scankey, or last key <= given scankey if nextkey
     324                 :             :  * is true.  (Since _bt_compare treats the first data key of such a page as
     325                 :             :  * minus infinity, there will be at least one key < scankey, so the result
     326                 :             :  * always points at one of the keys on the page.)
     327                 :             :  *
     328                 :             :  * On a leaf page, _bt_binsrch() returns the final result of the initial
     329                 :             :  * positioning process that started with _bt_first's call to _bt_search.
     330                 :             :  * We're returning a non-pivot tuple offset, so things are a little different.
     331                 :             :  * It is possible that we'll return an offset that's either past the last
     332                 :             :  * non-pivot slot, or (in the case of a backward scan) before the first slot.
     333                 :             :  *
     334                 :             :  * This procedure is not responsible for walking right, it just examines
     335                 :             :  * the given page.  _bt_binsrch() has no lock or refcount side effects
     336                 :             :  * on the buffer.
     337                 :             :  */
     338                 :             : static OffsetNumber
     339                 :     2601639 : _bt_binsrch(Relation rel,
     340                 :             :                         BTScanInsert key,
     341                 :             :                         Buffer buf)
     342                 :             : {
     343                 :     2601639 :         Page            page;
     344                 :     2601639 :         BTPageOpaque opaque;
     345                 :     2601639 :         OffsetNumber low,
     346                 :             :                                 high;
     347                 :     2601639 :         int32           result,
     348                 :             :                                 cmpval;
     349                 :             : 
     350                 :     2601639 :         page = BufferGetPage(buf);
     351                 :     2601639 :         opaque = BTPageGetOpaque(page);
     352                 :             : 
     353                 :             :         /* Requesting nextkey semantics while using scantid seems nonsensical */
     354   [ +  +  +  - ]:     2601639 :         Assert(!key->nextkey || key->scantid == NULL);
     355                 :             :         /* scantid-set callers must use _bt_binsrch_insert() on leaf pages */
     356   [ +  +  +  - ]:     2601639 :         Assert(!P_ISLEAF(opaque) || key->scantid == NULL);
     357                 :             : 
     358                 :     2601639 :         low = P_FIRSTDATAKEY(opaque);
     359                 :     2601639 :         high = PageGetMaxOffsetNumber(page);
     360                 :             : 
     361                 :             :         /*
     362                 :             :          * If there are no keys on the page, return the first available slot. Note
     363                 :             :          * this covers two cases: the page is really empty (no keys), or it
     364                 :             :          * contains only a high key.  The latter case is possible after vacuuming.
     365                 :             :          * This can never happen on an internal page, however, since they are
     366                 :             :          * never empty (an internal page must have at least one child).
     367                 :             :          */
     368         [ +  + ]:     2601639 :         if (unlikely(high < low))
     369                 :         877 :                 return low;
     370                 :             : 
     371                 :             :         /*
     372                 :             :          * Binary search to find the first key on the page >= scan key, or first
     373                 :             :          * key > scankey when nextkey is true.
     374                 :             :          *
     375                 :             :          * For nextkey=false (cmpval=1), the loop invariant is: all slots before
     376                 :             :          * 'low' are < scan key, all slots at or after 'high' are >= scan key.
     377                 :             :          *
     378                 :             :          * For nextkey=true (cmpval=0), the loop invariant is: all slots before
     379                 :             :          * 'low' are <= scan key, all slots at or after 'high' are > scan key.
     380                 :             :          *
     381                 :             :          * We can fall out when high == low.
     382                 :             :          */
     383                 :     2600762 :         high++;                                         /* establish the loop invariant for high */
     384                 :             : 
     385                 :     2600762 :         cmpval = key->nextkey ? 0 : 1;       /* select comparison value */
     386                 :             : 
     387         [ +  + ]:    17521213 :         while (high > low)
     388                 :             :         {
     389                 :    14920451 :                 OffsetNumber mid = low + ((high - low) / 2);
     390                 :             : 
     391                 :             :                 /* We have low <= mid < high, so mid points at a real slot */
     392                 :             : 
     393                 :    14920451 :                 result = _bt_compare(rel, key, page, mid);
     394                 :             : 
     395         [ +  + ]:    14920451 :                 if (result >= cmpval)
     396                 :    10260201 :                         low = mid + 1;
     397                 :             :                 else
     398                 :     4660250 :                         high = mid;
     399                 :    14920451 :         }
     400                 :             : 
     401                 :             :         /*
     402                 :             :          * At this point we have high == low.
     403                 :             :          *
     404                 :             :          * On a leaf page we always return the first non-pivot tuple >= scan key
     405                 :             :          * (resp. > scan key) for forward scan callers.  For backward scans, it's
     406                 :             :          * always the _last_ non-pivot tuple < scan key (resp. <= scan key).
     407                 :             :          */
     408         [ +  + ]:     2600762 :         if (P_ISLEAF(opaque))
     409                 :             :         {
     410                 :             :                 /*
     411                 :             :                  * In the backward scan case we're supposed to locate the last
     412                 :             :                  * matching tuple on the leaf level -- not the first matching tuple
     413                 :             :                  * (the last tuple will be the first one returned by the scan).
     414                 :             :                  *
     415                 :             :                  * At this point we've located the first non-pivot tuple immediately
     416                 :             :                  * after the last matching tuple (which might just be maxoff + 1).
     417                 :             :                  * Compensate by stepping back.
     418                 :             :                  */
     419         [ +  + ]:     1020456 :                 if (key->backward)
     420                 :        3427 :                         return OffsetNumberPrev(low);
     421                 :             : 
     422                 :     1017029 :                 return low;
     423                 :             :         }
     424                 :             : 
     425                 :             :         /*
     426                 :             :          * On a non-leaf page, return the last key < scan key (resp. <= scan key).
     427                 :             :          * There must be one if _bt_compare() is playing by the rules.
     428                 :             :          *
     429                 :             :          * _bt_compare() will seldom see any exactly-matching pivot tuples, since
     430                 :             :          * a truncated -inf heap TID is usually enough to prevent it altogether.
     431                 :             :          * Even omitted scan key entries are treated as > truncated attributes.
     432                 :             :          *
     433                 :             :          * However, during backward scans _bt_compare() interprets omitted scan
     434                 :             :          * key attributes as == corresponding truncated -inf attributes instead.
     435                 :             :          * This works just like < would work here.  Under this scheme, < strategy
     436                 :             :          * backward scans will always directly descend to the correct leaf page.
     437                 :             :          * In particular, they will never incur an "extra" leaf page access with a
     438                 :             :          * scan key that happens to contain the same prefix of values as some
     439                 :             :          * pivot tuple's untruncated prefix.  VACUUM relies on this guarantee when
     440                 :             :          * it uses a leaf page high key to "re-find" a page undergoing deletion.
     441                 :             :          */
     442         [ +  - ]:     1580306 :         Assert(low > P_FIRSTDATAKEY(opaque));
     443                 :             : 
     444                 :     1580306 :         return OffsetNumberPrev(low);
     445                 :     2601639 : }
     446                 :             : 
     447                 :             : /*
     448                 :             :  *
     449                 :             :  *      _bt_binsrch_insert() -- Cacheable, incremental leaf page binary search.
     450                 :             :  *
     451                 :             :  * Like _bt_binsrch(), but with support for caching the binary search
     452                 :             :  * bounds.  Only used during insertion, and only on the leaf page that it
     453                 :             :  * looks like caller will insert tuple on.  Exclusive-locked and pinned
     454                 :             :  * leaf page is contained within insertstate.
     455                 :             :  *
     456                 :             :  * Caches the bounds fields in insertstate so that a subsequent call can
     457                 :             :  * reuse the low and strict high bounds of original binary search.  Callers
     458                 :             :  * that use these fields directly must be prepared for the case where low
     459                 :             :  * and/or stricthigh are not on the same page (one or both exceed maxoff
     460                 :             :  * for the page).  The case where there are no items on the page (high <
     461                 :             :  * low) makes bounds invalid.
     462                 :             :  *
     463                 :             :  * Caller is responsible for invalidating bounds when it modifies the page
     464                 :             :  * before calling here a second time, and for dealing with posting list
     465                 :             :  * tuple matches (callers can use insertstate's postingoff field to
     466                 :             :  * determine which existing heap TID will need to be replaced by a posting
     467                 :             :  * list split).
     468                 :             :  */
     469                 :             : OffsetNumber
     470                 :     1372289 : _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
     471                 :             : {
     472                 :     1372289 :         BTScanInsert key = insertstate->itup_key;
     473                 :     1372289 :         Page            page;
     474                 :     1372289 :         BTPageOpaque opaque;
     475                 :     1372289 :         OffsetNumber low,
     476                 :             :                                 high,
     477                 :             :                                 stricthigh;
     478                 :     1372289 :         int32           result,
     479                 :             :                                 cmpval;
     480                 :             : 
     481                 :     1372289 :         page = BufferGetPage(insertstate->buf);
     482                 :     1372289 :         opaque = BTPageGetOpaque(page);
     483                 :             : 
     484         [ +  - ]:     1372289 :         Assert(P_ISLEAF(opaque));
     485         [ +  - ]:     1372289 :         Assert(!key->nextkey);
     486         [ +  - ]:     1372289 :         Assert(insertstate->postingoff == 0);
     487                 :             : 
     488         [ +  + ]:     1372289 :         if (!insertstate->bounds_valid)
     489                 :             :         {
     490                 :             :                 /* Start new binary search */
     491                 :      792709 :                 low = P_FIRSTDATAKEY(opaque);
     492                 :      792709 :                 high = PageGetMaxOffsetNumber(page);
     493                 :      792709 :         }
     494                 :             :         else
     495                 :             :         {
     496                 :             :                 /* Restore result of previous binary search against same page */
     497                 :      579580 :                 low = insertstate->low;
     498                 :      579580 :                 high = insertstate->stricthigh;
     499                 :             :         }
     500                 :             : 
     501                 :             :         /* If there are no keys on the page, return the first available slot */
     502         [ +  + ]:     1372289 :         if (unlikely(high < low))
     503                 :             :         {
     504                 :             :                 /* Caller can't reuse bounds */
     505                 :        1277 :                 insertstate->low = InvalidOffsetNumber;
     506                 :        1277 :                 insertstate->stricthigh = InvalidOffsetNumber;
     507                 :        1277 :                 insertstate->bounds_valid = false;
     508                 :        1277 :                 return low;
     509                 :             :         }
     510                 :             : 
     511                 :             :         /*
     512                 :             :          * Binary search to find the first key on the page >= scan key. (nextkey
     513                 :             :          * is always false when inserting).
     514                 :             :          *
     515                 :             :          * The loop invariant is: all slots before 'low' are < scan key, all slots
     516                 :             :          * at or after 'high' are >= scan key.  'stricthigh' is > scan key, and is
     517                 :             :          * maintained to save additional search effort for caller.
     518                 :             :          *
     519                 :             :          * We can fall out when high == low.
     520                 :             :          */
     521         [ +  + ]:     1371012 :         if (!insertstate->bounds_valid)
     522                 :      791432 :                 high++;                                 /* establish the loop invariant for high */
     523                 :     1371012 :         stricthigh = high;                      /* high initially strictly higher */
     524                 :             : 
     525                 :     1371012 :         cmpval = 1;                                     /* !nextkey comparison value */
     526                 :             : 
     527         [ +  + ]:     7051814 :         while (high > low)
     528                 :             :         {
     529                 :     5680802 :                 OffsetNumber mid = low + ((high - low) / 2);
     530                 :             : 
     531                 :             :                 /* We have low <= mid < high, so mid points at a real slot */
     532                 :             : 
     533                 :     5680802 :                 result = _bt_compare(rel, key, page, mid);
     534                 :             : 
     535         [ +  + ]:     5680802 :                 if (result >= cmpval)
     536                 :     4954551 :                         low = mid + 1;
     537                 :             :                 else
     538                 :             :                 {
     539                 :      726251 :                         high = mid;
     540         [ +  + ]:      726251 :                         if (result != 0)
     541                 :      681491 :                                 stricthigh = high;
     542                 :             :                 }
     543                 :             : 
     544                 :             :                 /*
     545                 :             :                  * If tuple at offset located by binary search is a posting list whose
     546                 :             :                  * TID range overlaps with caller's scantid, perform posting list
     547                 :             :                  * binary search to set postingoff for caller.  Caller must split the
     548                 :             :                  * posting list when postingoff is set.  This should happen
     549                 :             :                  * infrequently.
     550                 :             :                  */
     551   [ +  +  +  + ]:     5680802 :                 if (unlikely(result == 0 && key->scantid != NULL))
     552                 :             :                 {
     553                 :             :                         /*
     554                 :             :                          * postingoff should never be set more than once per leaf page
     555                 :             :                          * binary search.  That would mean that there are duplicate table
     556                 :             :                          * TIDs in the index, which is never okay.  Check for that here.
     557                 :             :                          */
     558         [ +  - ]:        2766 :                         if (insertstate->postingoff != 0)
     559   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     560                 :             :                                                 (errcode(ERRCODE_INDEX_CORRUPTED),
     561                 :             :                                                  errmsg_internal("table tid from new index tuple (%u,%u) cannot find insert offset between offsets %u and %u of block %u in index \"%s\"",
     562                 :             :                                                                                  ItemPointerGetBlockNumber(key->scantid),
     563                 :             :                                                                                  ItemPointerGetOffsetNumber(key->scantid),
     564                 :             :                                                                                  low, stricthigh,
     565                 :             :                                                                                  BufferGetBlockNumber(insertstate->buf),
     566                 :             :                                                                                  RelationGetRelationName(rel))));
     567                 :             : 
     568                 :        2766 :                         insertstate->postingoff = _bt_binsrch_posting(key, page, mid);
     569                 :        2766 :                 }
     570                 :     5680802 :         }
     571                 :             : 
     572                 :             :         /*
     573                 :             :          * On a leaf page, a binary search always returns the first key >= scan
     574                 :             :          * key (at least in !nextkey case), which could be the last slot + 1. This
     575                 :             :          * is also the lower bound of cached search.
     576                 :             :          *
     577                 :             :          * stricthigh may also be the last slot + 1, which prevents caller from
     578                 :             :          * using bounds directly, but is still useful to us if we're called a
     579                 :             :          * second time with cached bounds (cached low will be < stricthigh when
     580                 :             :          * that happens).
     581                 :             :          */
     582                 :     1371012 :         insertstate->low = low;
     583                 :     1371012 :         insertstate->stricthigh = stricthigh;
     584                 :     1371012 :         insertstate->bounds_valid = true;
     585                 :             : 
     586                 :     1371012 :         return low;
     587                 :     1372289 : }
     588                 :             : 
     589                 :             : /*----------
     590                 :             :  *      _bt_binsrch_posting() -- posting list binary search.
     591                 :             :  *
     592                 :             :  * Helper routine for _bt_binsrch_insert().
     593                 :             :  *
     594                 :             :  * Returns offset into posting list where caller's scantid belongs.
     595                 :             :  *----------
     596                 :             :  */
     597                 :             : static int
     598                 :        2766 : _bt_binsrch_posting(BTScanInsert key, Page page, OffsetNumber offnum)
     599                 :             : {
     600                 :        2766 :         IndexTuple      itup;
     601                 :        2766 :         ItemId          itemid;
     602                 :        2766 :         int                     low,
     603                 :             :                                 high,
     604                 :             :                                 mid,
     605                 :             :                                 res;
     606                 :             : 
     607                 :             :         /*
     608                 :             :          * If this isn't a posting tuple, then the index must be corrupt (if it is
     609                 :             :          * an ordinary non-pivot tuple then there must be an existing tuple with a
     610                 :             :          * heap TID that equals inserter's new heap TID/scantid).  Defensively
     611                 :             :          * check that tuple is a posting list tuple whose posting list range
     612                 :             :          * includes caller's scantid.
     613                 :             :          *
     614                 :             :          * (This is also needed because contrib/amcheck's rootdescend option needs
     615                 :             :          * to be able to relocate a non-pivot tuple using _bt_binsrch_insert().)
     616                 :             :          */
     617                 :        2766 :         itemid = PageGetItemId(page, offnum);
     618                 :        2766 :         itup = (IndexTuple) PageGetItem(page, itemid);
     619         [ -  + ]:        2766 :         if (!BTreeTupleIsPosting(itup))
     620                 :           0 :                 return 0;
     621                 :             : 
     622         [ +  - ]:        2766 :         Assert(key->heapkeyspace && key->allequalimage);
     623                 :             : 
     624                 :             :         /*
     625                 :             :          * In the event that posting list tuple has LP_DEAD bit set, indicate this
     626                 :             :          * to _bt_binsrch_insert() caller by returning -1, a sentinel value.  A
     627                 :             :          * second call to _bt_binsrch_insert() can take place when its caller has
     628                 :             :          * removed the dead item.
     629                 :             :          */
     630         [ -  + ]:        2766 :         if (ItemIdIsDead(itemid))
     631                 :           0 :                 return -1;
     632                 :             : 
     633                 :             :         /* "high" is past end of posting list for loop invariant */
     634                 :        2766 :         low = 0;
     635                 :        2766 :         high = BTreeTupleGetNPosting(itup);
     636         [ +  - ]:        2766 :         Assert(high >= 2);
     637                 :             : 
     638         [ +  + ]:       21147 :         while (high > low)
     639                 :             :         {
     640                 :       18381 :                 mid = low + ((high - low) / 2);
     641                 :       36762 :                 res = ItemPointerCompare(key->scantid,
     642                 :       18381 :                                                                  BTreeTupleGetPostingN(itup, mid));
     643                 :             : 
     644         [ +  + ]:       18381 :                 if (res > 0)
     645                 :       10261 :                         low = mid + 1;
     646         [ -  + ]:        8120 :                 else if (res < 0)
     647                 :        8120 :                         high = mid;
     648                 :             :                 else
     649                 :           0 :                         return mid;
     650                 :             :         }
     651                 :             : 
     652                 :             :         /* Exact match not found */
     653                 :        2766 :         return low;
     654                 :        2766 : }
     655                 :             : 
     656                 :             : /*----------
     657                 :             :  *      _bt_compare() -- Compare insertion-type scankey to tuple on a page.
     658                 :             :  *
     659                 :             :  *      page/offnum: location of btree item to be compared to.
     660                 :             :  *
     661                 :             :  *              This routine returns:
     662                 :             :  *                      <0 if scankey < tuple at offnum;
     663                 :             :  *                       0 if scankey == tuple at offnum;
     664                 :             :  *                      >0 if scankey > tuple at offnum.
     665                 :             :  *
     666                 :             :  * NULLs in the keys are treated as sortable values.  Therefore
     667                 :             :  * "equality" does not necessarily mean that the item should be returned
     668                 :             :  * to the caller as a matching key.  Similarly, an insertion scankey
     669                 :             :  * with its scantid set is treated as equal to a posting tuple whose TID
     670                 :             :  * range overlaps with their scantid.  There generally won't be a
     671                 :             :  * matching TID in the posting tuple, which caller must handle
     672                 :             :  * themselves (e.g., by splitting the posting list tuple).
     673                 :             :  *
     674                 :             :  * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be
     675                 :             :  * "minus infinity": this routine will always claim it is less than the
     676                 :             :  * scankey.  The actual key value stored is explicitly truncated to 0
     677                 :             :  * attributes (explicitly minus infinity) with version 3+ indexes, but
     678                 :             :  * that isn't relied upon.  This allows us to implement the Lehman and
     679                 :             :  * Yao convention that the first down-link pointer is before the first
     680                 :             :  * key.  See backend/access/nbtree/README for details.
     681                 :             :  *----------
     682                 :             :  */
     683                 :             : int32
     684                 :    22624628 : _bt_compare(Relation rel,
     685                 :             :                         BTScanInsert key,
     686                 :             :                         Page page,
     687                 :             :                         OffsetNumber offnum)
     688                 :             : {
     689                 :    22624628 :         TupleDesc       itupdesc = RelationGetDescr(rel);
     690                 :    22624628 :         BTPageOpaque opaque = BTPageGetOpaque(page);
     691                 :    22624628 :         IndexTuple      itup;
     692                 :    22624628 :         ItemPointer heapTid;
     693                 :    22624628 :         ScanKey         scankey;
     694                 :    22624628 :         int                     ncmpkey;
     695                 :    22624628 :         int                     ntupatts;
     696                 :    22624628 :         int32           result;
     697                 :             : 
     698         [ +  - ]:    22624628 :         Assert(_bt_check_natts(rel, key->heapkeyspace, page, offnum));
     699         [ +  - ]:    22624628 :         Assert(key->keysz <= IndexRelationGetNumberOfKeyAttributes(rel));
     700   [ -  +  #  # ]:    22624628 :         Assert(key->heapkeyspace || key->scantid == NULL);
     701                 :             : 
     702                 :             :         /*
     703                 :             :          * Force result ">" if target item is first data item on an internal page
     704                 :             :          * --- see NOTE above.
     705                 :             :          */
     706   [ +  +  +  + ]:    22624628 :         if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
     707                 :      233604 :                 return 1;
     708                 :             : 
     709                 :    22391024 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
     710         [ +  + ]:    22391024 :         ntupatts = BTreeTupleGetNAtts(itup, rel);
     711                 :             : 
     712                 :             :         /*
     713                 :             :          * The scan key is set up with the attribute number associated with each
     714                 :             :          * term in the key.  It is important that, if the index is multi-key, the
     715                 :             :          * scan contain the first k key attributes, and that they be in order.  If
     716                 :             :          * you think about how multi-key ordering works, you'll understand why
     717                 :             :          * this is.
     718                 :             :          *
     719                 :             :          * We don't test for violation of this condition here, however.  The
     720                 :             :          * initial setup for the index scan had better have gotten it right (see
     721                 :             :          * _bt_first).
     722                 :             :          */
     723                 :             : 
     724         [ +  + ]:    22391024 :         ncmpkey = Min(ntupatts, key->keysz);
     725   [ -  +  #  # ]:    22391024 :         Assert(key->heapkeyspace || ncmpkey == key->keysz);
     726   [ +  +  +  - ]:    22391024 :         Assert(!BTreeTupleIsPosting(itup) || key->allequalimage);
     727                 :    22391024 :         scankey = key->scankeys;
     728   [ +  +  +  + ]:    48275338 :         for (int i = 1; i <= ncmpkey; i++)
     729                 :             :         {
     730                 :    25884314 :                 Datum           datum;
     731                 :    25884314 :                 bool            isNull;
     732                 :             : 
     733                 :    25884314 :                 datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
     734                 :             : 
     735         [ +  + ]:    25884314 :                 if (scankey->sk_flags & SK_ISNULL)       /* key is NULL */
     736                 :             :                 {
     737         [ +  + ]:       25461 :                         if (isNull)
     738                 :          87 :                                 result = 0;             /* NULL "=" NULL */
     739         [ +  + ]:       25374 :                         else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
     740                 :         104 :                                 result = -1;    /* NULL "<" NOT_NULL */
     741                 :             :                         else
     742                 :       25270 :                                 result = 1;             /* NULL ">" NOT_NULL */
     743                 :       25461 :                 }
     744         [ +  + ]:    25858853 :                 else if (isNull)                /* key is NOT_NULL and item is NULL */
     745                 :             :                 {
     746         [ -  + ]:          58 :                         if (scankey->sk_flags & SK_BT_NULLS_FIRST)
     747                 :           0 :                                 result = 1;             /* NOT_NULL ">" NULL */
     748                 :             :                         else
     749                 :          58 :                                 result = -1;    /* NOT_NULL "<" NULL */
     750                 :          58 :                 }
     751                 :             :                 else
     752                 :             :                 {
     753                 :             :                         /*
     754                 :             :                          * The sk_func needs to be passed the index value as left arg and
     755                 :             :                          * the sk_argument as right arg (they might be of different
     756                 :             :                          * types).  Since it is convenient for callers to think of
     757                 :             :                          * _bt_compare as comparing the scankey to the index item, we have
     758                 :             :                          * to flip the sign of the comparison result.  (Unless it's a DESC
     759                 :             :                          * column, in which case we *don't* flip the sign.)
     760                 :             :                          */
     761                 :    51717590 :                         result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
     762                 :    25858795 :                                                                                                          scankey->sk_collation,
     763                 :    25858795 :                                                                                                          datum,
     764                 :    25858795 :                                                                                                          scankey->sk_argument));
     765                 :             : 
     766         [ +  + ]:    25858795 :                         if (!(scankey->sk_flags & SK_BT_DESC))
     767         [ +  + ]:    25858784 :                                 INVERT_COMPARE_RESULT(result);
     768                 :             :                 }
     769                 :             : 
     770                 :             :                 /* if the keys are unequal, return the difference */
     771         [ +  + ]:    25884314 :                 if (result != 0)
     772                 :    20141860 :                         return result;
     773                 :             : 
     774                 :     5742454 :                 scankey++;
     775         [ +  + ]:    25884314 :         }
     776                 :             : 
     777                 :             :         /*
     778                 :             :          * All non-truncated attributes (other than heap TID) were found to be
     779                 :             :          * equal.  Treat truncated attributes as minus infinity when scankey has a
     780                 :             :          * key attribute value that would otherwise be compared directly.
     781                 :             :          *
     782                 :             :          * Note: it doesn't matter if ntupatts includes non-key attributes;
     783                 :             :          * scankey won't, so explicitly excluding non-key attributes isn't
     784                 :             :          * necessary.
     785                 :             :          */
     786         [ +  + ]:     2249164 :         if (key->keysz > ntupatts)
     787                 :        8625 :                 return 1;
     788                 :             : 
     789                 :             :         /*
     790                 :             :          * Use the heap TID attribute and scantid to try to break the tie.  The
     791                 :             :          * rules are the same as any other key attribute -- only the
     792                 :             :          * representation differs.
     793                 :             :          */
     794                 :     2240539 :         heapTid = BTreeTupleGetHeapTID(itup);
     795         [ +  + ]:     2240539 :         if (key->scantid == NULL)
     796                 :             :         {
     797                 :             :                 /*
     798                 :             :                  * Forward scans have a scankey that is considered greater than a
     799                 :             :                  * truncated pivot tuple if and when the scankey has equal values for
     800                 :             :                  * attributes up to and including the least significant untruncated
     801                 :             :                  * attribute in tuple.  Even attributes that were omitted from the
     802                 :             :                  * scan key are considered greater than -inf truncated attributes.
     803                 :             :                  * (See _bt_binsrch for an explanation of our backward scan behavior.)
     804                 :             :                  *
     805                 :             :                  * For example, if an index has the minimum two attributes (single
     806                 :             :                  * user key attribute, plus heap TID attribute), and a page's high key
     807                 :             :                  * is ('foo', -inf), and scankey is ('foo', <omitted>), the search
     808                 :             :                  * will not descend to the page to the left.  The search will descend
     809                 :             :                  * right instead.  The truncated attribute in pivot tuple means that
     810                 :             :                  * all non-pivot tuples on the page to the left are strictly < 'foo',
     811                 :             :                  * so it isn't necessary to descend left.  In other words, search
     812                 :             :                  * doesn't have to descend left because it isn't interested in a match
     813                 :             :                  * that has a heap TID value of -inf.
     814                 :             :                  *
     815                 :             :                  * Note: the heap TID part of the test ensures that scankey is being
     816                 :             :                  * compared to a pivot tuple with one or more truncated -inf key
     817                 :             :                  * attributes.  The heap TID attribute is the last key attribute in
     818                 :             :                  * every index, of course, but other than that it isn't special.
     819                 :             :                  */
     820   [ +  +  +  +  :     1911763 :                 if (!key->backward && key->keysz == ntupatts && heapTid == NULL &&
             +  +  -  + ]
     821                 :         541 :                         key->heapkeyspace)
     822                 :         541 :                         return 1;
     823                 :             : 
     824                 :             :                 /* All provided scankey arguments found to be equal */
     825                 :     1911222 :                 return 0;
     826                 :             :         }
     827                 :             : 
     828                 :             :         /*
     829                 :             :          * Treat truncated heap TID as minus infinity, since scankey has a key
     830                 :             :          * attribute value (scantid) that would otherwise be compared directly
     831                 :             :          */
     832         [ +  - ]:      328776 :         Assert(key->keysz == IndexRelationGetNumberOfKeyAttributes(rel));
     833         [ +  + ]:      328776 :         if (heapTid == NULL)
     834                 :         403 :                 return 1;
     835                 :             : 
     836                 :             :         /*
     837                 :             :          * Scankey must be treated as equal to a posting list tuple if its scantid
     838                 :             :          * value falls within the range of the posting list.  In all other cases
     839                 :             :          * there can only be a single heap TID value, which is compared directly
     840                 :             :          * with scantid.
     841                 :             :          */
     842         [ +  - ]:      328373 :         Assert(ntupatts >= IndexRelationGetNumberOfKeyAttributes(rel));
     843                 :      328373 :         result = ItemPointerCompare(key->scantid, heapTid);
     844   [ +  +  +  + ]:      328373 :         if (result <= 0 || !BTreeTupleIsPosting(itup))
     845                 :      310941 :                 return result;
     846                 :             :         else
     847                 :             :         {
     848                 :       34864 :                 result = ItemPointerCompare(key->scantid,
     849                 :       17432 :                                                                         BTreeTupleGetMaxHeapTID(itup));
     850         [ +  + ]:       17432 :                 if (result > 0)
     851                 :       14666 :                         return 1;
     852                 :             :         }
     853                 :             : 
     854                 :        2766 :         return 0;
     855                 :    22624628 : }
     856                 :             : 
     857                 :             : /*
     858                 :             :  *      _bt_first() -- Find the first item in a scan.
     859                 :             :  *
     860                 :             :  *              We need to be clever about the direction of scan, the search
     861                 :             :  *              conditions, and the tree ordering.  We find the first item (or,
     862                 :             :  *              if backwards scan, the last item) in the tree that satisfies the
     863                 :             :  *              qualifications in the scan key.  On success exit, data about the
     864                 :             :  *              matching tuple(s) on the page has been loaded into so->currPos.  We'll
     865                 :             :  *              drop all locks and hold onto a pin on page's buffer, except during
     866                 :             :  *              so->dropPin scans, when we drop both the lock and the pin.
     867                 :             :  *              _bt_returnitem sets the next item to return to scan on success exit.
     868                 :             :  *
     869                 :             :  * If there are no matching items in the index, we return false, with no
     870                 :             :  * pins or locks held.  so->currPos will remain invalid.
     871                 :             :  *
     872                 :             :  * Note that scan->keyData[], and the so->keyData[] scankey built from it,
     873                 :             :  * are both search-type scankeys (see nbtree/README for more about this).
     874                 :             :  * Within this routine, we build a temporary insertion-type scankey to use
     875                 :             :  * in locating the scan start position.
     876                 :             :  */
     877                 :             : bool
     878                 :     1055065 : _bt_first(IndexScanDesc scan, ScanDirection dir)
     879                 :             : {
     880                 :     1055065 :         Relation        rel = scan->indexRelation;
     881                 :     1055065 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
     882                 :     1055065 :         BTStack         stack;
     883                 :     1055065 :         OffsetNumber offnum;
     884                 :     1055065 :         BTScanInsertData inskey;
     885                 :     1055065 :         ScanKey         startKeys[INDEX_MAX_KEYS];
     886                 :     1055065 :         ScanKeyData notnullkey;
     887                 :     1055065 :         int                     keysz = 0;
     888                 :     1055065 :         StrategyNumber strat_total = InvalidStrategy;
     889                 :     1055065 :         BlockNumber blkno = InvalidBlockNumber,
     890                 :             :                                 lastcurrblkno;
     891                 :             : 
     892   [ +  +  +  -  :     1055065 :         Assert(!BTScanPosIsValid(so->currPos));
                   +  - ]
     893                 :             : 
     894                 :             :         /*
     895                 :             :          * Examine the scan keys and eliminate any redundant keys; also mark the
     896                 :             :          * keys that must be matched to continue the scan.
     897                 :             :          */
     898                 :     1055065 :         _bt_preprocess_keys(scan);
     899                 :             : 
     900                 :             :         /*
     901                 :             :          * Quit now if _bt_preprocess_keys() discovered that the scan keys can
     902                 :             :          * never be satisfied (eg, x == 1 AND x > 2).
     903                 :             :          */
     904         [ +  + ]:     1055065 :         if (!so->qual_ok)
     905                 :             :         {
     906         [ +  - ]:          22 :                 Assert(!so->needPrimScan);
     907                 :          22 :                 _bt_parallel_done(scan);
     908                 :          22 :                 return false;
     909                 :             :         }
     910                 :             : 
     911                 :             :         /*
     912                 :             :          * If this is a parallel scan, we must seize the scan.  _bt_readfirstpage
     913                 :             :          * will likely release the parallel scan later on.
     914                 :             :          */
     915   [ +  +  +  + ]:     1055043 :         if (scan->parallel_scan != NULL &&
     916                 :          73 :                 !_bt_parallel_seize(scan, &blkno, &lastcurrblkno, true))
     917                 :          53 :                 return false;
     918                 :             : 
     919                 :             :         /*
     920                 :             :          * Initialize the scan's arrays (if any) for the current scan direction
     921                 :             :          * (except when they were already set to later values as part of
     922                 :             :          * scheduling the primitive index scan that is now underway)
     923                 :             :          */
     924   [ +  +  +  + ]:     1054990 :         if (so->numArrayKeys && !so->needPrimScan)
     925                 :       11414 :                 _bt_start_array_keys(scan, dir);
     926                 :             : 
     927         [ -  + ]:     1054990 :         if (blkno != InvalidBlockNumber)
     928                 :             :         {
     929                 :             :                 /*
     930                 :             :                  * We anticipated calling _bt_search, but another worker bet us to it.
     931                 :             :                  * _bt_readnextpage releases the scan for us (not _bt_readfirstpage).
     932                 :             :                  */
     933         [ #  # ]:           0 :                 Assert(scan->parallel_scan != NULL);
     934         [ #  # ]:           0 :                 Assert(!so->needPrimScan);
     935         [ #  # ]:           0 :                 Assert(blkno != P_NONE);
     936                 :             : 
     937         [ #  # ]:           0 :                 if (!_bt_readnextpage(scan, blkno, lastcurrblkno, dir, true))
     938                 :           0 :                         return false;
     939                 :             : 
     940                 :           0 :                 _bt_returnitem(scan, so);
     941                 :           0 :                 return true;
     942                 :             :         }
     943                 :             : 
     944                 :             :         /*
     945                 :             :          * Count an indexscan for stats, now that we know that we'll call
     946                 :             :          * _bt_search/_bt_endpoint below
     947                 :             :          */
     948   [ +  +  +  +  :     1054990 :         pgstat_count_index_scan(rel);
                   +  - ]
     949         [ +  + ]:     1054986 :         if (scan->instrument)
     950                 :      173085 :                 scan->instrument->nsearches++;
     951                 :             : 
     952                 :             :         /*----------
     953                 :             :          * Examine the scan keys to discover where we need to start the scan.
     954                 :             :          * The selected scan keys (at most one per index column) are remembered by
     955                 :             :          * storing their addresses into the local startKeys[] array.  The final
     956                 :             :          * startKeys[] entry's strategy is set in strat_total. (Actually, there
     957                 :             :          * are a couple of cases where we force a less/more restrictive strategy.)
     958                 :             :          *
     959                 :             :          * We must use the key that was marked required (in the direction opposite
     960                 :             :          * our own scan's) during preprocessing.  Each index attribute can only
     961                 :             :          * have one such required key.  In general, the keys that we use to find
     962                 :             :          * an initial position when scanning forwards are the same keys that end
     963                 :             :          * the scan on the leaf level when scanning backwards (and vice-versa).
     964                 :             :          *
     965                 :             :          * When the scan keys include cross-type operators, _bt_preprocess_keys
     966                 :             :          * may not be able to eliminate redundant keys; in such cases it will
     967                 :             :          * arbitrarily pick a usable key for each attribute (and scan direction),
     968                 :             :          * ensuring that there is no more than one key required in each direction.
     969                 :             :          * We stop considering further keys once we reach the first nonrequired
     970                 :             :          * key (which must come after all required keys), so this can't affect us.
     971                 :             :          *
     972                 :             :          * The required keys that we use as starting boundaries have to be =, >,
     973                 :             :          * or >= keys for a forward scan or =, <, <= keys for a backwards scan.
     974                 :             :          * We can use keys for multiple attributes so long as the prior attributes
     975                 :             :          * had only =, >= (resp. =, <=) keys.  These rules are very similar to the
     976                 :             :          * rules that preprocessing used to determine which keys to mark required.
     977                 :             :          * We cannot always use every required key as a positioning key, though.
     978                 :             :          * Skip arrays necessitate independently applying our own rules here.
     979                 :             :          * Skip arrays are always generally considered = array keys, but we'll
     980                 :             :          * nevertheless treat them as inequalities at certain points of the scan.
     981                 :             :          * When that happens, it _might_ have implications for the number of
     982                 :             :          * required keys that we can safely use for initial positioning purposes.
     983                 :             :          *
     984                 :             :          * For example, a forward scan with a skip array on its leading attribute
     985                 :             :          * (with no low_compare/high_compare) will have at least two required scan
     986                 :             :          * keys, but we won't use any of them as boundary keys during the scan's
     987                 :             :          * initial call here.  Our positioning key during the first call here can
     988                 :             :          * be thought of as representing "> -infinity".  Similarly, if such a skip
     989                 :             :          * array's low_compare is "a > 'foo'", then we position using "a > 'foo'"
     990                 :             :          * during the scan's initial call here; a lower-order key such as "b = 42"
     991                 :             :          * can't be used until the "a" array advances beyond MINVAL/low_compare.
     992                 :             :          *
     993                 :             :          * On the other hand, if such a skip array's low_compare was "a >= 'foo'",
     994                 :             :          * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here.
     995                 :             :          * A subsequent call here might have us use "a = 'fop' AND b = 42".  Note
     996                 :             :          * that we treat = and >= as equivalent when scanning forwards (just as we
     997                 :             :          * treat = and <= as equivalent when scanning backwards).  We effectively
     998                 :             :          * do the same thing (though with a distinct "a" element/value) each time.
     999                 :             :          *
    1000                 :             :          * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP
    1001                 :             :          * array keys whose array is "null_elem=true") imply a NOT NULL qualifier.
    1002                 :             :          * If the index stores nulls at the end of the index we'll be starting
    1003                 :             :          * from, and we have no boundary key for the column (which means the key
    1004                 :             :          * we deduced NOT NULL from is an inequality key that constrains the other
    1005                 :             :          * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to
    1006                 :             :          * use as a boundary key.  If we didn't do this, we might find ourselves
    1007                 :             :          * traversing a lot of null entries at the start of the scan.
    1008                 :             :          *
    1009                 :             :          * In this loop, row-comparison keys are treated the same as keys on their
    1010                 :             :          * first (leftmost) columns.  We'll add all lower-order columns of the row
    1011                 :             :          * comparison that were marked required during preprocessing below.
    1012                 :             :          *
    1013                 :             :          * _bt_advance_array_keys needs to know exactly how we'll reposition the
    1014                 :             :          * scan (should it opt to schedule another primitive index scan).  It is
    1015                 :             :          * critical that primscans only be scheduled when they'll definitely make
    1016                 :             :          * some useful progress.  _bt_advance_array_keys does this by calling
    1017                 :             :          * _bt_checkkeys routines that report whether a tuple is past the end of
    1018                 :             :          * matches for the scan's keys (given the scan's current array elements).
    1019                 :             :          * If the page's final tuple is "after the end of matches" for a scan that
    1020                 :             :          * uses the *opposite* scan direction, then it must follow that it's also
    1021                 :             :          * "before the start of matches" for the actual current scan direction.
    1022                 :             :          * It is therefore essential that all of our initial positioning rules are
    1023                 :             :          * symmetric with _bt_checkkeys's corresponding continuescan=false rule.
    1024                 :             :          * If you update anything here, _bt_checkkeys/_bt_advance_array_keys might
    1025                 :             :          * need to be kept in sync.
    1026                 :             :          *----------
    1027                 :             :          */
    1028         [ +  + ]:     1054986 :         if (so->numberOfKeys > 0)
    1029                 :             :         {
    1030                 :     1054271 :                 AttrNumber      curattr;
    1031                 :     1054271 :                 ScanKey         bkey;
    1032                 :     1054271 :                 ScanKey         impliesNN;
    1033                 :     1054271 :                 ScanKey         cur;
    1034                 :             : 
    1035                 :             :                 /*
    1036                 :             :                  * bkey will be set to the key that preprocessing left behind as the
    1037                 :             :                  * boundary key for this attribute, in this scan direction (if any)
    1038                 :             :                  */
    1039                 :     1054271 :                 cur = so->keyData;
    1040                 :     1054271 :                 curattr = 1;
    1041                 :     1054271 :                 bkey = NULL;
    1042                 :             :                 /* Also remember any scankey that implies a NOT NULL constraint */
    1043                 :     1054271 :                 impliesNN = NULL;
    1044                 :             : 
    1045                 :             :                 /*
    1046                 :             :                  * Loop iterates from 0 to numberOfKeys inclusive; we use the last
    1047                 :             :                  * pass to handle after-last-key processing.  Actual exit from the
    1048                 :             :                  * loop is at one of the "break" statements below.
    1049                 :             :                  */
    1050                 :     3738210 :                 for (int i = 0;; cur++, i++)
    1051                 :             :                 {
    1052   [ +  +  +  + ]:     2683937 :                         if (i >= so->numberOfKeys || cur->sk_attno != curattr)
    1053                 :             :                         {
    1054                 :             :                                 /* Done looking for the curattr boundary key */
    1055   [ +  +  +  - ]:     1629377 :                                 Assert(bkey == NULL ||
    1056                 :             :                                            (bkey->sk_attno == curattr &&
    1057                 :             :                                                 (bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
    1058   [ +  +  +  - ]:     1629377 :                                 Assert(impliesNN == NULL ||
    1059                 :             :                                            (impliesNN->sk_attno == curattr &&
    1060                 :             :                                                 (impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
    1061                 :             : 
    1062                 :             :                                 /*
    1063                 :             :                                  * If this is a scan key for a skip array whose current
    1064                 :             :                                  * element is MINVAL, choose low_compare (when scanning
    1065                 :             :                                  * backwards it'll be MAXVAL, and we'll choose high_compare).
    1066                 :             :                                  *
    1067                 :             :                                  * Note: if the array's low_compare key makes 'bkey' NULL,
    1068                 :             :                                  * then we behave as if the array's first element is -inf,
    1069                 :             :                                  * except when !array->null_elem implies a usable NOT NULL
    1070                 :             :                                  * constraint.
    1071                 :             :                                  */
    1072   [ +  +  +  + ]:     1629377 :                                 if (bkey != NULL &&
    1073                 :     1623800 :                                         (bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
    1074                 :             :                                 {
    1075                 :         265 :                                         int                     ikey = bkey - so->keyData;
    1076                 :         265 :                                         ScanKey         skipequalitykey = bkey;
    1077                 :         265 :                                         BTArrayKeyInfo *array = NULL;
    1078                 :             : 
    1079         [ +  - ]:         533 :                                         for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
    1080                 :             :                                         {
    1081                 :         268 :                                                 array = &so->arrayKeys[arridx];
    1082         [ +  + ]:         268 :                                                 if (array->scan_key == ikey)
    1083                 :         265 :                                                         break;
    1084                 :           3 :                                         }
    1085                 :             : 
    1086         [ +  + ]:         265 :                                         if (ScanDirectionIsForward(dir))
    1087                 :             :                                         {
    1088         [ -  + ]:         262 :                                                 Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL));
    1089                 :         262 :                                                 bkey = array->low_compare;
    1090                 :         262 :                                         }
    1091                 :             :                                         else
    1092                 :             :                                         {
    1093         [ +  - ]:           3 :                                                 Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL));
    1094                 :           3 :                                                 bkey = array->high_compare;
    1095                 :             :                                         }
    1096                 :             : 
    1097   [ +  +  -  + ]:         265 :                                         Assert(bkey == NULL ||
    1098                 :             :                                                    bkey->sk_attno == skipequalitykey->sk_attno);
    1099                 :             : 
    1100         [ +  + ]:         265 :                                         if (!array->null_elem)
    1101                 :          33 :                                                 impliesNN = skipequalitykey;
    1102                 :             :                                         else
    1103         [ +  - ]:         232 :                                                 Assert(bkey == NULL && impliesNN == NULL);
    1104                 :         265 :                                 }
    1105                 :             : 
    1106                 :             :                                 /*
    1107                 :             :                                  * If we didn't find a usable boundary key, see if we can
    1108                 :             :                                  * deduce a NOT NULL key
    1109                 :             :                                  */
    1110   [ +  +  +  +  :     1629377 :                                 if (bkey == NULL && impliesNN != NULL &&
                   +  + ]
    1111         [ +  + ]:        5589 :                                         ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
    1112                 :           4 :                                          ScanDirectionIsForward(dir) :
    1113                 :        5585 :                                          ScanDirectionIsBackward(dir)))
    1114                 :             :                                 {
    1115                 :             :                                         /* Final startKeys[] entry will be deduced NOT NULL key */
    1116                 :           5 :                                         bkey = &notnullkey;
    1117                 :          10 :                                         ScanKeyEntryInitialize(bkey,
    1118                 :           5 :                                                                                    (SK_SEARCHNOTNULL | SK_ISNULL |
    1119                 :           5 :                                                                                         (impliesNN->sk_flags &
    1120                 :             :                                                                                          (SK_BT_DESC | SK_BT_NULLS_FIRST))),
    1121                 :           5 :                                                                                    curattr,
    1122                 :           5 :                                                                                    ScanDirectionIsForward(dir) ?
    1123                 :             :                                                                                    BTGreaterStrategyNumber : BTLessStrategyNumber,
    1124                 :             :                                                                                    InvalidOid,
    1125                 :             :                                                                                    InvalidOid,
    1126                 :             :                                                                                    InvalidOid,
    1127                 :             :                                                                                    (Datum) 0);
    1128                 :           5 :                                 }
    1129                 :             : 
    1130                 :             :                                 /*
    1131                 :             :                                  * If preprocessing didn't leave a usable boundary key, quit;
    1132                 :             :                                  * else save the boundary key pointer in startKeys[]
    1133                 :             :                                  */
    1134         [ +  + ]:     1629379 :                                 if (bkey == NULL)
    1135                 :        5816 :                                         break;
    1136                 :     1623563 :                                 startKeys[keysz++] = bkey;
    1137                 :             : 
    1138                 :             :                                 /*
    1139                 :             :                                  * We can only consider adding more boundary keys when the one
    1140                 :             :                                  * that we just chose to add uses either the = or >= strategy
    1141                 :             :                                  * (during backwards scans we can only do so when the key that
    1142                 :             :                                  * we just added to startKeys[] uses the = or <= strategy)
    1143                 :             :                                  */
    1144                 :     1623563 :                                 strat_total = bkey->sk_strategy;
    1145   [ +  +  +  + ]:     1623563 :                                 if (strat_total == BTGreaterStrategyNumber ||
    1146                 :     1535072 :                                         strat_total == BTLessStrategyNumber)
    1147                 :       91889 :                                         break;
    1148                 :             : 
    1149                 :             :                                 /*
    1150                 :             :                                  * If the key that we just added to startKeys[] is a skip
    1151                 :             :                                  * array = key whose current element is marked NEXT or PRIOR,
    1152                 :             :                                  * make strat_total > or < (and stop adding boundary keys).
    1153                 :             :                                  * This can only happen with opclasses that lack skip support.
    1154                 :             :                                  */
    1155         [ +  + ]:     1531674 :                                 if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
    1156                 :             :                                 {
    1157         [ +  - ]:           2 :                                         Assert(bkey->sk_flags & SK_BT_SKIP);
    1158         [ +  - ]:           2 :                                         Assert(strat_total == BTEqualStrategyNumber);
    1159                 :             : 
    1160         [ +  + ]:           2 :                                         if (ScanDirectionIsForward(dir))
    1161                 :             :                                         {
    1162         [ +  - ]:           1 :                                                 Assert(!(bkey->sk_flags & SK_BT_PRIOR));
    1163                 :           1 :                                                 strat_total = BTGreaterStrategyNumber;
    1164                 :           1 :                                         }
    1165                 :             :                                         else
    1166                 :             :                                         {
    1167         [ +  - ]:           1 :                                                 Assert(!(bkey->sk_flags & SK_BT_NEXT));
    1168                 :           1 :                                                 strat_total = BTLessStrategyNumber;
    1169                 :             :                                         }
    1170                 :             : 
    1171                 :             :                                         /*
    1172                 :             :                                          * We're done.  We'll never find an exact = match for a
    1173                 :             :                                          * NEXT or PRIOR sentinel sk_argument value.  There's no
    1174                 :             :                                          * sense in trying to add more keys to startKeys[].
    1175                 :             :                                          */
    1176                 :           2 :                                         break;
    1177                 :             :                                 }
    1178                 :             : 
    1179                 :             :                                 /*
    1180                 :             :                                  * Done if that was the last scan key output by preprocessing.
    1181                 :             :                                  * Also done if we've now examined all keys marked required.
    1182                 :             :                                  */
    1183   [ +  +  +  + ]:     1531672 :                                 if (i >= so->numberOfKeys ||
    1184                 :      575107 :                                         !(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
    1185                 :      956566 :                                         break;
    1186                 :             : 
    1187                 :             :                                 /*
    1188                 :             :                                  * Reset for next attr.
    1189                 :             :                                  */
    1190         [ +  - ]:      575106 :                                 Assert(cur->sk_attno == curattr + 1);
    1191                 :      575106 :                                 curattr = cur->sk_attno;
    1192                 :      575106 :                                 bkey = NULL;
    1193                 :      575106 :                                 impliesNN = NULL;
    1194                 :      575106 :                         }
    1195                 :             : 
    1196                 :             :                         /*
    1197                 :             :                          * If we've located the starting boundary key for curattr, we have
    1198                 :             :                          * no interest in curattr's other required key
    1199                 :             :                          */
    1200         [ +  + ]:     1629666 :                         if (bkey != NULL)
    1201                 :         283 :                                 continue;
    1202                 :             : 
    1203                 :             :                         /*
    1204                 :             :                          * Is this key the starting boundary key for curattr?
    1205                 :             :                          *
    1206                 :             :                          * If not, does it imply a NOT NULL constraint?  (Because
    1207                 :             :                          * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
    1208                 :             :                          * *any* inequality key works for that; we need not test.)
    1209                 :             :                          */
    1210   [ +  +  +  - ]:     1629383 :                         switch (cur->sk_strategy)
    1211                 :             :                         {
    1212                 :             :                                 case BTLessStrategyNumber:
    1213                 :             :                                 case BTLessEqualStrategyNumber:
    1214         [ +  + ]:        8974 :                                         if (ScanDirectionIsBackward(dir))
    1215                 :        3398 :                                                 bkey = cur;
    1216         [ -  + ]:        5576 :                                         else if (impliesNN == NULL)
    1217                 :        5576 :                                                 impliesNN = cur;
    1218                 :        8974 :                                         break;
    1219                 :             :                                 case BTEqualStrategyNumber:
    1220                 :     1531280 :                                         bkey = cur;
    1221                 :     1531280 :                                         break;
    1222                 :             :                                 case BTGreaterEqualStrategyNumber:
    1223                 :             :                                 case BTGreaterStrategyNumber:
    1224         [ +  + ]:       89129 :                                         if (ScanDirectionIsForward(dir))
    1225                 :       89122 :                                                 bkey = cur;
    1226         [ -  + ]:           7 :                                         else if (impliesNN == NULL)
    1227                 :           7 :                                                 impliesNN = cur;
    1228                 :       89129 :                                         break;
    1229                 :             :                         }
    1230                 :     1629383 :                 }
    1231                 :     1054273 :         }
    1232                 :             : 
    1233                 :             :         /*
    1234                 :             :          * If we found no usable boundary keys, we have to start from one end of
    1235                 :             :          * the tree.  Walk down that edge to the first or last key, and scan from
    1236                 :             :          * there.
    1237                 :             :          *
    1238                 :             :          * Note: calls _bt_readfirstpage for us, which releases the parallel scan.
    1239                 :             :          */
    1240         [ +  + ]:     1054988 :         if (keysz == 0)
    1241                 :        6449 :                 return _bt_endpoint(scan, dir);
    1242                 :             : 
    1243                 :             :         /*
    1244                 :             :          * We want to start the scan somewhere within the index.  Set up an
    1245                 :             :          * insertion scankey we can use to search for the boundary point we
    1246                 :             :          * identified above.  The insertion scankey is built using the keys
    1247                 :             :          * identified by startKeys[].  (Remaining insertion scankey fields are
    1248                 :             :          * initialized after initial-positioning scan keys are finalized.)
    1249                 :             :          */
    1250         [ +  - ]:     1048539 :         Assert(keysz <= INDEX_MAX_KEYS);
    1251         [ +  + ]:     2672102 :         for (int i = 0; i < keysz; i++)
    1252                 :             :         {
    1253                 :     1623563 :                 ScanKey         bkey = startKeys[i];
    1254                 :             : 
    1255         [ -  + ]:     1623563 :                 Assert(bkey->sk_attno == i + 1);
    1256                 :             : 
    1257         [ +  + ]:     1623563 :                 if (bkey->sk_flags & SK_ROW_HEADER)
    1258                 :             :                 {
    1259                 :             :                         /*
    1260                 :             :                          * Row comparison header: look to the first row member instead
    1261                 :             :                          */
    1262                 :           8 :                         ScanKey         subkey = (ScanKey) DatumGetPointer(bkey->sk_argument);
    1263                 :           8 :                         bool            loosen_strat = false,
    1264                 :           8 :                                                 tighten_strat = false;
    1265                 :             : 
    1266                 :             :                         /*
    1267                 :             :                          * Cannot be a NULL in the first row member: _bt_preprocess_keys
    1268                 :             :                          * would've marked the qual as unsatisfiable, preventing us from
    1269                 :             :                          * ever getting this far
    1270                 :             :                          */
    1271         [ -  + ]:           8 :                         Assert(subkey->sk_flags & SK_ROW_MEMBER);
    1272         [ +  - ]:           8 :                         Assert(subkey->sk_attno == bkey->sk_attno);
    1273         [ -  + ]:           8 :                         Assert(!(subkey->sk_flags & SK_ISNULL));
    1274                 :             : 
    1275                 :             :                         /*
    1276                 :             :                          * This is either a > or >= key (during backwards scans it is
    1277                 :             :                          * either < or <=) that was marked required during preprocessing.
    1278                 :             :                          * Later so->keyData[] keys can't have been marked required, so
    1279                 :             :                          * our row compare header key must be the final startKeys[] entry.
    1280                 :             :                          */
    1281         [ -  + ]:           8 :                         Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD));
    1282         [ -  + ]:           8 :                         Assert(subkey->sk_strategy == bkey->sk_strategy);
    1283         [ -  + ]:           8 :                         Assert(subkey->sk_strategy == strat_total);
    1284         [ -  + ]:           8 :                         Assert(i == keysz - 1);
    1285                 :             : 
    1286                 :             :                         /*
    1287                 :             :                          * The member scankeys are already in insertion format (ie, they
    1288                 :             :                          * have sk_func = 3-way-comparison function)
    1289                 :             :                          */
    1290                 :           8 :                         memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
    1291                 :             : 
    1292                 :             :                         /*
    1293                 :             :                          * Now look to later row compare members.
    1294                 :             :                          *
    1295                 :             :                          * If there's an "index attribute gap" between two row compare
    1296                 :             :                          * members, the second member won't have been marked required, and
    1297                 :             :                          * so can't be used as a starting boundary key here.  The part of
    1298                 :             :                          * the row comparison that we do still use has to be treated as a
    1299                 :             :                          * ">=" or "<=" condition.  For example, a qual "(a, c) > (1, 42)"
    1300                 :             :                          * with an omitted intervening index attribute "b" will use an
    1301                 :             :                          * insertion scan key "a >= 1".  Even the first "a = 1" tuple on
    1302                 :             :                          * the leaf level might satisfy the row compare qual.
    1303                 :             :                          *
    1304                 :             :                          * We're able to use a _more_ restrictive strategy when we reach a
    1305                 :             :                          * NULL row compare member, since they're always unsatisfiable.
    1306                 :             :                          * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
    1307                 :             :                          * insertion scan key "a > 1".  All tuples where "a = 1" cannot
    1308                 :             :                          * possibly satisfy the row compare qual, so this is safe.
    1309                 :             :                          */
    1310         [ -  + ]:           8 :                         Assert(!(subkey->sk_flags & SK_ROW_END));
    1311                 :           8 :                         for (;;)
    1312                 :             :                         {
    1313                 :           8 :                                 subkey++;
    1314         [ -  + ]:           8 :                                 Assert(subkey->sk_flags & SK_ROW_MEMBER);
    1315                 :             : 
    1316         [ +  + ]:           8 :                                 if (subkey->sk_flags & SK_ISNULL)
    1317                 :             :                                 {
    1318                 :             :                                         /*
    1319                 :             :                                          * NULL member key, can only use earlier keys.
    1320                 :             :                                          *
    1321                 :             :                                          * We deliberately avoid checking if this key is marked
    1322                 :             :                                          * required.  All earlier keys are required, and this key
    1323                 :             :                                          * is unsatisfiable either way, so we can't miss anything.
    1324                 :             :                                          */
    1325                 :           2 :                                         tighten_strat = true;
    1326                 :           2 :                                         break;
    1327                 :             :                                 }
    1328                 :             : 
    1329         [ +  + ]:           6 :                                 if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
    1330                 :             :                                 {
    1331                 :             :                                         /* nonrequired member key, can only use earlier keys */
    1332                 :           2 :                                         loosen_strat = true;
    1333                 :           2 :                                         break;
    1334                 :             :                                 }
    1335                 :             : 
    1336         [ -  + ]:           4 :                                 Assert(subkey->sk_attno == keysz + 1);
    1337         [ +  - ]:           4 :                                 Assert(subkey->sk_strategy == bkey->sk_strategy);
    1338         [ -  + ]:           4 :                                 Assert(keysz < INDEX_MAX_KEYS);
    1339                 :             : 
    1340                 :           4 :                                 memcpy(inskey.scankeys + keysz, subkey, sizeof(ScanKeyData));
    1341                 :           4 :                                 keysz++;
    1342                 :             : 
    1343         [ -  + ]:           4 :                                 if (subkey->sk_flags & SK_ROW_END)
    1344                 :           4 :                                         break;
    1345                 :             :                         }
    1346   [ +  +  -  + ]:           8 :                         Assert(!(loosen_strat && tighten_strat));
    1347         [ +  + ]:           8 :                         if (loosen_strat)
    1348                 :             :                         {
    1349                 :             :                                 /* Use less restrictive strategy (and fewer member keys) */
    1350      [ -  +  + ]:           2 :                                 switch (strat_total)
    1351                 :             :                                 {
    1352                 :             :                                         case BTLessStrategyNumber:
    1353                 :           1 :                                                 strat_total = BTLessEqualStrategyNumber;
    1354                 :           1 :                                                 break;
    1355                 :             :                                         case BTGreaterStrategyNumber:
    1356                 :           1 :                                                 strat_total = BTGreaterEqualStrategyNumber;
    1357                 :           1 :                                                 break;
    1358                 :             :                                 }
    1359                 :           2 :                         }
    1360         [ +  + ]:           8 :                         if (tighten_strat)
    1361                 :             :                         {
    1362                 :             :                                 /* Use more restrictive strategy (and fewer member keys) */
    1363      [ -  +  + ]:           2 :                                 switch (strat_total)
    1364                 :             :                                 {
    1365                 :             :                                         case BTLessEqualStrategyNumber:
    1366                 :           1 :                                                 strat_total = BTLessStrategyNumber;
    1367                 :           1 :                                                 break;
    1368                 :             :                                         case BTGreaterEqualStrategyNumber:
    1369                 :           1 :                                                 strat_total = BTGreaterStrategyNumber;
    1370                 :           1 :                                                 break;
    1371                 :             :                                 }
    1372                 :           2 :                         }
    1373                 :             : 
    1374                 :             :                         /* Done (row compare header key is always last startKeys[] key) */
    1375                 :             :                         break;
    1376                 :           8 :                 }
    1377                 :             : 
    1378                 :             :                 /*
    1379                 :             :                  * Ordinary comparison key/search-style key.
    1380                 :             :                  *
    1381                 :             :                  * Transform the search-style scan key to an insertion scan key by
    1382                 :             :                  * replacing the sk_func with the appropriate btree 3-way-comparison
    1383                 :             :                  * function.
    1384                 :             :                  *
    1385                 :             :                  * If scankey operator is not a cross-type comparison, we can use the
    1386                 :             :                  * cached comparison function; otherwise gotta look it up in the
    1387                 :             :                  * catalogs.  (That can't lead to infinite recursion, since no
    1388                 :             :                  * indexscan initiated by syscache lookup will use cross-data-type
    1389                 :             :                  * operators.)
    1390                 :             :                  *
    1391                 :             :                  * We support the convention that sk_subtype == InvalidOid means the
    1392                 :             :                  * opclass input type; this hack simplifies life for ScanKeyInit().
    1393                 :             :                  */
    1394   [ +  +  +  + ]:     1623555 :                 if (bkey->sk_subtype == rel->rd_opcintype[i] ||
    1395                 :     1444471 :                         bkey->sk_subtype == InvalidOid)
    1396                 :             :                 {
    1397                 :     1621778 :                         FmgrInfo   *procinfo;
    1398                 :             : 
    1399                 :     1621778 :                         procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC);
    1400                 :     3243556 :                         ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
    1401                 :     1621778 :                                                                                    bkey->sk_flags,
    1402                 :     1621778 :                                                                                    bkey->sk_attno,
    1403                 :             :                                                                                    InvalidStrategy,
    1404                 :     1621778 :                                                                                    bkey->sk_subtype,
    1405                 :     1621778 :                                                                                    bkey->sk_collation,
    1406                 :     1621778 :                                                                                    procinfo,
    1407                 :     1621778 :                                                                                    bkey->sk_argument);
    1408                 :     1621778 :                 }
    1409                 :             :                 else
    1410                 :             :                 {
    1411                 :        1777 :                         RegProcedure cmp_proc;
    1412                 :             : 
    1413                 :        3554 :                         cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
    1414                 :        1777 :                                                                                  rel->rd_opcintype[i],
    1415                 :        1777 :                                                                                  bkey->sk_subtype, BTORDER_PROC);
    1416         [ +  - ]:        1777 :                         if (!RegProcedureIsValid(cmp_proc))
    1417   [ #  #  #  # ]:           0 :                                 elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
    1418                 :             :                                          BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype,
    1419                 :             :                                          bkey->sk_attno, RelationGetRelationName(rel));
    1420                 :        3554 :                         ScanKeyEntryInitialize(inskey.scankeys + i,
    1421                 :        1777 :                                                                    bkey->sk_flags,
    1422                 :        1777 :                                                                    bkey->sk_attno,
    1423                 :             :                                                                    InvalidStrategy,
    1424                 :        1777 :                                                                    bkey->sk_subtype,
    1425                 :        1777 :                                                                    bkey->sk_collation,
    1426                 :        1777 :                                                                    cmp_proc,
    1427                 :        1777 :                                                                    bkey->sk_argument);
    1428                 :        1777 :                 }
    1429         [ +  + ]:     1623563 :         }
    1430                 :             : 
    1431                 :             :         /*----------
    1432                 :             :          * Examine the selected initial-positioning strategy to determine exactly
    1433                 :             :          * where we need to start the scan, and set flag variables to control the
    1434                 :             :          * initial descent by _bt_search (and our _bt_binsrch call for the leaf
    1435                 :             :          * page _bt_search returns).
    1436                 :             :          *----------
    1437                 :             :          */
    1438                 :     1048539 :         _bt_metaversion(rel, &inskey.heapkeyspace, &inskey.allequalimage);
    1439                 :     1048539 :         inskey.anynullkeys = false; /* unused */
    1440                 :     1048539 :         inskey.scantid = NULL;
    1441                 :     1048539 :         inskey.keysz = keysz;
    1442   [ +  +  +  +  :     1048539 :         switch (strat_total)
                   +  - ]
    1443                 :             :         {
    1444                 :             :                 case BTLessStrategyNumber:
    1445                 :             : 
    1446                 :        3399 :                         inskey.nextkey = false;
    1447                 :        3399 :                         inskey.backward = true;
    1448                 :        3399 :                         break;
    1449                 :             : 
    1450                 :             :                 case BTLessEqualStrategyNumber:
    1451                 :             : 
    1452                 :           3 :                         inskey.nextkey = true;
    1453                 :           3 :                         inskey.backward = true;
    1454                 :           3 :                         break;
    1455                 :             : 
    1456                 :             :                 case BTEqualStrategyNumber:
    1457                 :             : 
    1458                 :             :                         /*
    1459                 :             :                          * If a backward scan was specified, need to start with last equal
    1460                 :             :                          * item not first one.
    1461                 :             :                          */
    1462         [ +  + ]:      956011 :                         if (ScanDirectionIsBackward(dir))
    1463                 :             :                         {
    1464                 :             :                                 /*
    1465                 :             :                                  * This is the same as the <= strategy
    1466                 :             :                                  */
    1467                 :          25 :                                 inskey.nextkey = true;
    1468                 :          25 :                                 inskey.backward = true;
    1469                 :          25 :                         }
    1470                 :             :                         else
    1471                 :             :                         {
    1472                 :             :                                 /*
    1473                 :             :                                  * This is the same as the >= strategy
    1474                 :             :                                  */
    1475                 :      955986 :                                 inskey.nextkey = false;
    1476                 :      955986 :                                 inskey.backward = false;
    1477                 :             :                         }
    1478                 :      956011 :                         break;
    1479                 :             : 
    1480                 :             :                 case BTGreaterEqualStrategyNumber:
    1481                 :             : 
    1482                 :             :                         /*
    1483                 :             :                          * Find first item >= scankey
    1484                 :             :                          */
    1485                 :         634 :                         inskey.nextkey = false;
    1486                 :         634 :                         inskey.backward = false;
    1487                 :         634 :                         break;
    1488                 :             : 
    1489                 :             :                 case BTGreaterStrategyNumber:
    1490                 :             : 
    1491                 :             :                         /*
    1492                 :             :                          * Find first item > scankey
    1493                 :             :                          */
    1494                 :       88492 :                         inskey.nextkey = true;
    1495                 :       88492 :                         inskey.backward = false;
    1496                 :       88492 :                         break;
    1497                 :             : 
    1498                 :             :                 default:
    1499                 :             :                         /* can't get here, but keep compiler quiet */
    1500   [ #  #  #  # ]:           0 :                         elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
    1501                 :           0 :                         return false;
    1502                 :             :         }
    1503                 :             : 
    1504                 :             :         /*
    1505                 :             :          * Use the manufactured insertion scan key to descend the tree and
    1506                 :             :          * position ourselves on the target leaf page.
    1507                 :             :          */
    1508         [ +  - ]:     1048539 :         Assert(ScanDirectionIsBackward(dir) == inskey.backward);
    1509                 :     1048539 :         stack = _bt_search(rel, NULL, &inskey, &so->currPos.buf, BT_READ);
    1510                 :             : 
    1511                 :             :         /* don't need to keep the stack around... */
    1512                 :     1048539 :         _bt_freestack(stack);
    1513                 :             : 
    1514         [ +  + ]:     1048539 :         if (!BufferIsValid(so->currPos.buf))
    1515                 :             :         {
    1516         [ +  - ]:       27206 :                 Assert(!so->needPrimScan);
    1517                 :             : 
    1518                 :             :                 /*
    1519                 :             :                  * We only get here if the index is completely empty. Lock relation
    1520                 :             :                  * because nothing finer to lock exists.  Without a buffer lock, it's
    1521                 :             :                  * possible for another transaction to insert data between
    1522                 :             :                  * _bt_search() and PredicateLockRelation().  We have to try again
    1523                 :             :                  * after taking the relation-level predicate lock, to close a narrow
    1524                 :             :                  * window where we wouldn't scan concurrently inserted tuples, but the
    1525                 :             :                  * writer wouldn't see our predicate lock.
    1526                 :             :                  */
    1527         [ +  + ]:       27206 :                 if (IsolationIsSerializable())
    1528                 :             :                 {
    1529                 :           7 :                         PredicateLockRelation(rel, scan->xs_snapshot);
    1530                 :           7 :                         stack = _bt_search(rel, NULL, &inskey, &so->currPos.buf, BT_READ);
    1531                 :           7 :                         _bt_freestack(stack);
    1532                 :           7 :                 }
    1533                 :             : 
    1534         [ -  + ]:       27206 :                 if (!BufferIsValid(so->currPos.buf))
    1535                 :             :                 {
    1536                 :       27206 :                         _bt_parallel_done(scan);
    1537                 :       27206 :                         return false;
    1538                 :             :                 }
    1539                 :           0 :         }
    1540                 :             : 
    1541                 :             :         /* position to the precise item on the page */
    1542                 :     1021333 :         offnum = _bt_binsrch(rel, &inskey, so->currPos.buf);
    1543                 :             : 
    1544                 :             :         /*
    1545                 :             :          * Now load data from the first page of the scan (usually the page
    1546                 :             :          * currently in so->currPos.buf).
    1547                 :             :          *
    1548                 :             :          * If inskey.nextkey = false and inskey.backward = false, offnum is
    1549                 :             :          * positioned at the first non-pivot tuple >= inskey.scankeys.
    1550                 :             :          *
    1551                 :             :          * If inskey.nextkey = false and inskey.backward = true, offnum is
    1552                 :             :          * positioned at the last non-pivot tuple < inskey.scankeys.
    1553                 :             :          *
    1554                 :             :          * If inskey.nextkey = true and inskey.backward = false, offnum is
    1555                 :             :          * positioned at the first non-pivot tuple > inskey.scankeys.
    1556                 :             :          *
    1557                 :             :          * If inskey.nextkey = true and inskey.backward = true, offnum is
    1558                 :             :          * positioned at the last non-pivot tuple <= inskey.scankeys.
    1559                 :             :          *
    1560                 :             :          * It's possible that _bt_binsrch returned an offnum that is out of bounds
    1561                 :             :          * for the page.  For example, when inskey is both < the leaf page's high
    1562                 :             :          * key and > all of its non-pivot tuples, offnum will be "maxoff + 1".
    1563                 :             :          */
    1564         [ +  + ]:     1021333 :         if (!_bt_readfirstpage(scan, offnum, dir))
    1565                 :      287433 :                 return false;
    1566                 :             : 
    1567                 :      733900 :         _bt_returnitem(scan, so);
    1568                 :      733900 :         return true;
    1569                 :     1055063 : }
    1570                 :             : 
    1571                 :             : /*
    1572                 :             :  *      _bt_next() -- Get the next item in a scan.
    1573                 :             :  *
    1574                 :             :  *              On entry, so->currPos describes the current page, which may be pinned
    1575                 :             :  *              but is not locked, and so->currPos.itemIndex identifies which item was
    1576                 :             :  *              previously returned.
    1577                 :             :  *
    1578                 :             :  *              On success exit, so->currPos is updated as needed, and _bt_returnitem
    1579                 :             :  *              sets the next item to return to the scan.  so->currPos remains valid.
    1580                 :             :  *
    1581                 :             :  *              On failure exit (no more tuples), we invalidate so->currPos.  It'll
    1582                 :             :  *              still be possible for the scan to return tuples by changing direction,
    1583                 :             :  *              though we'll need to call _bt_first anew in that other direction.
    1584                 :             :  */
    1585                 :             : bool
    1586                 :     1701606 : _bt_next(IndexScanDesc scan, ScanDirection dir)
    1587                 :             : {
    1588                 :     1701606 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
    1589                 :             : 
    1590   [ -  +  #  #  :     1701606 :         Assert(BTScanPosIsValid(so->currPos));
                   +  - ]
    1591                 :             : 
    1592                 :             :         /*
    1593                 :             :          * Advance to next tuple on current page; or if there's no more, try to
    1594                 :             :          * step to the next page with data.
    1595                 :             :          */
    1596         [ +  + ]:     1701606 :         if (ScanDirectionIsForward(dir))
    1597                 :             :         {
    1598         [ +  + ]:     1699632 :                 if (++so->currPos.itemIndex > so->currPos.lastItem)
    1599                 :             :                 {
    1600         [ +  + ]:      202087 :                         if (!_bt_steppage(scan, dir))
    1601                 :      199164 :                                 return false;
    1602                 :        2923 :                 }
    1603                 :     1500468 :         }
    1604                 :             :         else
    1605                 :             :         {
    1606         [ +  + ]:        1974 :                 if (--so->currPos.itemIndex < so->currPos.firstItem)
    1607                 :             :                 {
    1608         [ +  + ]:          19 :                         if (!_bt_steppage(scan, dir))
    1609                 :          15 :                                 return false;
    1610                 :           4 :                 }
    1611                 :             :         }
    1612                 :             : 
    1613                 :     1502427 :         _bt_returnitem(scan, so);
    1614                 :     1502427 :         return true;
    1615                 :     1701606 : }
    1616                 :             : 
    1617                 :             : /*
    1618                 :             :  * Return the index item from so->currPos.items[so->currPos.itemIndex] to the
    1619                 :             :  * index scan by setting the relevant fields in caller's index scan descriptor
    1620                 :             :  */
    1621                 :             : static inline void
    1622                 :     2242318 : _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
    1623                 :             : {
    1624                 :     2242318 :         BTScanPosItem *currItem = &so->currPos.items[so->currPos.itemIndex];
    1625                 :             : 
    1626                 :             :         /* Most recent _bt_readpage must have succeeded */
    1627   [ -  +  #  #  :     2242318 :         Assert(BTScanPosIsValid(so->currPos));
                   +  - ]
    1628         [ +  - ]:     2242318 :         Assert(so->currPos.itemIndex >= so->currPos.firstItem);
    1629         [ +  - ]:     2242318 :         Assert(so->currPos.itemIndex <= so->currPos.lastItem);
    1630                 :             : 
    1631                 :             :         /* Return next item, per amgettuple contract */
    1632                 :     2242318 :         scan->xs_heaptid = currItem->heapTid;
    1633         [ +  + ]:     2242318 :         if (so->currTuples)
    1634                 :      674232 :                 scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
    1635                 :     2242318 : }
    1636                 :             : 
    1637                 :             : /*
    1638                 :             :  *      _bt_steppage() -- Step to next page containing valid data for scan
    1639                 :             :  *
    1640                 :             :  * Wrapper on _bt_readnextpage that performs final steps for the current page.
    1641                 :             :  *
    1642                 :             :  * On entry, so->currPos must be valid.  Its buffer will be pinned, though
    1643                 :             :  * never locked. (Actually, when so->dropPin there won't even be a pin held,
    1644                 :             :  * though so->currPos.currPage must still be set to a valid block number.)
    1645                 :             :  */
    1646                 :             : static bool
    1647                 :      489679 : _bt_steppage(IndexScanDesc scan, ScanDirection dir)
    1648                 :             : {
    1649                 :      489679 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
    1650                 :      489679 :         BlockNumber blkno,
    1651                 :             :                                 lastcurrblkno;
    1652                 :             : 
    1653   [ -  +  #  #  :      489679 :         Assert(BTScanPosIsValid(so->currPos));
                   +  - ]
    1654                 :             : 
    1655                 :             :         /* Before leaving current page, deal with any killed items */
    1656         [ +  + ]:      489679 :         if (so->numKilled > 0)
    1657                 :        5707 :                 _bt_killitems(scan);
    1658                 :             : 
    1659                 :             :         /*
    1660                 :             :          * Before we modify currPos, make a copy of the page data if there was a
    1661                 :             :          * mark position that needs it.
    1662                 :             :          */
    1663         [ +  + ]:      489679 :         if (so->markItemIndex >= 0)
    1664                 :             :         {
    1665                 :             :                 /* bump pin on current buffer for assignment to mark buffer */
    1666   [ -  +  #  #  :          58 :                 if (BTScanPosIsPinned(so->currPos))
                   +  + ]
    1667                 :          56 :                         IncrBufferRefCount(so->currPos.buf);
    1668                 :          58 :                 memcpy(&so->markPos, &so->currPos,
    1669                 :             :                            offsetof(BTScanPosData, items[1]) +
    1670                 :             :                            so->currPos.lastItem * sizeof(BTScanPosItem));
    1671         [ +  + ]:          58 :                 if (so->markTuples)
    1672                 :          56 :                         memcpy(so->markTuples, so->currTuples,
    1673                 :             :                                    so->currPos.nextTupleOffset);
    1674                 :          58 :                 so->markPos.itemIndex = so->markItemIndex;
    1675                 :          58 :                 so->markItemIndex = -1;
    1676                 :             : 
    1677                 :             :                 /*
    1678                 :             :                  * If we're just about to start the next primitive index scan
    1679                 :             :                  * (possible with a scan that has arrays keys, and needs to skip to
    1680                 :             :                  * continue in the current scan direction), moreLeft/moreRight only
    1681                 :             :                  * indicate the end of the current primitive index scan.  They must
    1682                 :             :                  * never be taken to indicate that the top-level index scan has ended
    1683                 :             :                  * (that would be wrong).
    1684                 :             :                  *
    1685                 :             :                  * We could handle this case by treating the current array keys as
    1686                 :             :                  * markPos state.  But depending on the current array state like this
    1687                 :             :                  * would add complexity.  Instead, we just unset markPos's copy of
    1688                 :             :                  * moreRight or moreLeft (whichever might be affected), while making
    1689                 :             :                  * btrestrpos reset the scan's arrays to their initial scan positions.
    1690                 :             :                  * In effect, btrestrpos leaves advancing the arrays up to the first
    1691                 :             :                  * _bt_readpage call (that takes place after it has restored markPos).
    1692                 :             :                  */
    1693         [ +  - ]:          58 :                 if (so->needPrimScan)
    1694                 :             :                 {
    1695         [ #  # ]:           0 :                         if (ScanDirectionIsForward(so->currPos.dir))
    1696                 :           0 :                                 so->markPos.moreRight = true;
    1697                 :             :                         else
    1698                 :           0 :                                 so->markPos.moreLeft = true;
    1699                 :           0 :                 }
    1700                 :             : 
    1701                 :             :                 /* mark/restore not supported by parallel scans */
    1702         [ +  - ]:          58 :                 Assert(!scan->parallel_scan);
    1703                 :          58 :         }
    1704                 :             : 
    1705   [ -  +  #  #  :      489679 :         BTScanPosUnpinIfPinned(so->currPos);
                   +  + ]
    1706                 :             : 
    1707                 :             :         /* Walk to the next page with data */
    1708         [ +  + ]:      489679 :         if (ScanDirectionIsForward(dir))
    1709                 :      489655 :                 blkno = so->currPos.nextPage;
    1710                 :             :         else
    1711                 :          24 :                 blkno = so->currPos.prevPage;
    1712                 :      489679 :         lastcurrblkno = so->currPos.currPage;
    1713                 :             : 
    1714                 :             :         /*
    1715                 :             :          * Cancel primitive index scans that were scheduled when the call to
    1716                 :             :          * _bt_readpage for currPos happened to use the opposite direction to the
    1717                 :             :          * one that we're stepping in now.  (It's okay to leave the scan's array
    1718                 :             :          * keys as-is, since the next _bt_readpage will advance them.)
    1719                 :             :          */
    1720         [ +  + ]:      489679 :         if (so->currPos.dir != dir)
    1721                 :           6 :                 so->needPrimScan = false;
    1722                 :             : 
    1723                 :      979358 :         return _bt_readnextpage(scan, blkno, lastcurrblkno, dir, false);
    1724                 :      489679 : }
    1725                 :             : 
    1726                 :             : /*
    1727                 :             :  *      _bt_readfirstpage() -- Read first page containing valid data for _bt_first
    1728                 :             :  *
    1729                 :             :  * _bt_first caller passes us an offnum returned by _bt_binsrch, which might
    1730                 :             :  * be an out of bounds offnum such as "maxoff + 1" in certain corner cases.
    1731                 :             :  * When we're passed an offnum past the end of the page, we might still manage
    1732                 :             :  * to stop the scan on this page by calling _bt_checkkeys against the high
    1733                 :             :  * key.  See _bt_readpage for full details.
    1734                 :             :  *
    1735                 :             :  * On entry, so->currPos must be pinned and locked (so offnum stays valid).
    1736                 :             :  * Parallel scan callers must have seized the scan before calling here.
    1737                 :             :  *
    1738                 :             :  * On exit, we'll have updated so->currPos and retained locks and pins
    1739                 :             :  * according to the same rules as those laid out for _bt_readnextpage exit.
    1740                 :             :  * Like _bt_readnextpage, our return value indicates if there are any matching
    1741                 :             :  * records in the given direction.
    1742                 :             :  *
    1743                 :             :  * We always release the scan for a parallel scan caller, regardless of
    1744                 :             :  * success or failure; we'll call _bt_parallel_release as soon as possible.
    1745                 :             :  */
    1746                 :             : static bool
    1747                 :     1027450 : _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
    1748                 :             : {
    1749                 :     1027450 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
    1750                 :             : 
    1751                 :     1027450 :         so->numKilled = 0;                   /* just paranoia */
    1752                 :     1027450 :         so->markItemIndex = -1;              /* ditto */
    1753                 :             : 
    1754                 :             :         /* Initialize so->currPos for the first page (page in so->currPos.buf) */
    1755         [ +  + ]:     1027450 :         if (so->needPrimScan)
    1756                 :             :         {
    1757         [ +  - ]:        2910 :                 Assert(so->numArrayKeys);
    1758                 :             : 
    1759                 :        2910 :                 so->currPos.moreLeft = true;
    1760                 :        2910 :                 so->currPos.moreRight = true;
    1761                 :        2910 :                 so->needPrimScan = false;
    1762                 :        2910 :         }
    1763         [ +  + ]:     1024540 :         else if (ScanDirectionIsForward(dir))
    1764                 :             :         {
    1765                 :     1021107 :                 so->currPos.moreLeft = false;
    1766                 :     1021107 :                 so->currPos.moreRight = true;
    1767                 :     1021107 :         }
    1768                 :             :         else
    1769                 :             :         {
    1770                 :        3433 :                 so->currPos.moreLeft = true;
    1771                 :        3433 :                 so->currPos.moreRight = false;
    1772                 :             :         }
    1773                 :             : 
    1774                 :             :         /*
    1775                 :             :          * Attempt to load matching tuples from the first page.
    1776                 :             :          *
    1777                 :             :          * Note that _bt_readpage will finish initializing the so->currPos fields.
    1778                 :             :          * _bt_readpage also releases parallel scan (even when it returns false).
    1779                 :             :          */
    1780         [ +  + ]:     1027450 :         if (_bt_readpage(scan, dir, offnum, true))
    1781                 :             :         {
    1782                 :      739877 :                 Relation        rel = scan->indexRelation;
    1783                 :             : 
    1784                 :             :                 /*
    1785                 :             :                  * _bt_readpage succeeded.  Drop the lock (and maybe the pin) on
    1786                 :             :                  * so->currPos.buf in preparation for btgettuple returning tuples.
    1787                 :             :                  */
    1788   [ -  +  #  #  :      739877 :                 Assert(BTScanPosIsPinned(so->currPos));
                   +  - ]
    1789                 :      739877 :                 _bt_drop_lock_and_maybe_pin(rel, so);
    1790                 :      739877 :                 return true;
    1791                 :      739877 :         }
    1792                 :             : 
    1793                 :             :         /* There's no actually-matching data on the page in so->currPos.buf */
    1794                 :      287573 :         _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
    1795                 :             : 
    1796                 :             :         /* Call _bt_readnextpage using its _bt_steppage wrapper function */
    1797         [ +  + ]:      287573 :         if (!_bt_steppage(scan, dir))
    1798                 :      287559 :                 return false;
    1799                 :             : 
    1800                 :             :         /* _bt_readpage for a later page (now in so->currPos) succeeded */
    1801                 :          14 :         return true;
    1802                 :     1027450 : }
    1803                 :             : 
    1804                 :             : /*
    1805                 :             :  *      _bt_readnextpage() -- Read next page containing valid data for _bt_next
    1806                 :             :  *
    1807                 :             :  * Caller's blkno is the next interesting page's link, taken from either the
    1808                 :             :  * previously-saved right link or left link.  lastcurrblkno is the page that
    1809                 :             :  * was current at the point where the blkno link was saved, which we use to
    1810                 :             :  * reason about concurrent page splits/page deletions during backwards scans.
    1811                 :             :  * In the common case where seized=false, blkno is either so->currPos.nextPage
    1812                 :             :  * or so->currPos.prevPage, and lastcurrblkno is so->currPos.currPage.
    1813                 :             :  *
    1814                 :             :  * On entry, so->currPos shouldn't be locked by caller.  so->currPos.buf must
    1815                 :             :  * be InvalidBuffer/unpinned as needed by caller (note that lastcurrblkno
    1816                 :             :  * won't need to be read again in almost all cases).  Parallel scan callers
    1817                 :             :  * that seized the scan before calling here should pass seized=true; such a
    1818                 :             :  * caller's blkno and lastcurrblkno arguments come from the seized scan.
    1819                 :             :  * seized=false callers just pass us the blkno/lastcurrblkno taken from their
    1820                 :             :  * so->currPos, which (along with so->currPos itself) can be used to end the
    1821                 :             :  * scan.  A seized=false caller's blkno can never be assumed to be the page
    1822                 :             :  * that must be read next during a parallel scan, though.  We must figure that
    1823                 :             :  * part out for ourselves by seizing the scan (the correct page to read might
    1824                 :             :  * already be beyond the seized=false caller's blkno during a parallel scan,
    1825                 :             :  * unless blkno/so->currPos.nextPage/so->currPos.prevPage is already P_NONE,
    1826                 :             :  * or unless so->currPos.moreRight/so->currPos.moreLeft is already unset).
    1827                 :             :  *
    1828                 :             :  * On success exit, so->currPos is updated to contain data from the next
    1829                 :             :  * interesting page, and we return true.  We hold a pin on the buffer on
    1830                 :             :  * success exit (except during so->dropPin index scans, when we drop the pin
    1831                 :             :  * eagerly to avoid blocking VACUUM).
    1832                 :             :  *
    1833                 :             :  * If there are no more matching records in the given direction, we invalidate
    1834                 :             :  * so->currPos (while ensuring it retains no locks or pins), and return false.
    1835                 :             :  *
    1836                 :             :  * We always release the scan for a parallel scan caller, regardless of
    1837                 :             :  * success or failure; we'll call _bt_parallel_release as soon as possible.
    1838                 :             :  */
    1839                 :             : static bool
    1840                 :      489679 : _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
    1841                 :             :                                  BlockNumber lastcurrblkno, ScanDirection dir, bool seized)
    1842                 :             : {
    1843                 :      489679 :         Relation        rel = scan->indexRelation;
    1844                 :      489679 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
    1845                 :             : 
    1846   [ -  +  #  # ]:      489679 :         Assert(so->currPos.currPage == lastcurrblkno || seized);
    1847   [ +  +  +  - ]:      489679 :         Assert(!(blkno == P_NONE && seized));
    1848   [ -  +  #  #  :      489679 :         Assert(!BTScanPosIsPinned(so->currPos));
                   +  - ]
    1849                 :             : 
    1850                 :             :         /*
    1851                 :             :          * Remember that the scan already read lastcurrblkno, a page to the left
    1852                 :             :          * of blkno (or remember reading a page to the right, for backwards scans)
    1853                 :             :          */
    1854         [ +  + ]:      489679 :         if (ScanDirectionIsForward(dir))
    1855                 :      489655 :                 so->currPos.moreLeft = true;
    1856                 :             :         else
    1857                 :          24 :                 so->currPos.moreRight = true;
    1858                 :             : 
    1859                 :      490057 :         for (;;)
    1860                 :             :         {
    1861                 :      490057 :                 Page            page;
    1862                 :      490057 :                 BTPageOpaque opaque;
    1863                 :             : 
    1864   [ +  +  +  + ]:      490057 :                 if (blkno == P_NONE ||
    1865         [ +  + ]:      206810 :                         (ScanDirectionIsForward(dir) ?
    1866                 :      206810 :                          !so->currPos.moreRight : !so->currPos.moreLeft))
    1867                 :             :                 {
    1868                 :             :                         /* most recent _bt_readpage call (for lastcurrblkno) ended scan */
    1869         [ +  - ]:      486738 :                         Assert(so->currPos.currPage == lastcurrblkno && !seized);
    1870                 :      486738 :                         BTScanPosInvalidate(so->currPos);
    1871                 :      486738 :                         _bt_parallel_done(scan);        /* iff !so->needPrimScan */
    1872                 :      486738 :                         return false;
    1873                 :             :                 }
    1874                 :             : 
    1875         [ -  + ]:        3319 :                 Assert(!so->needPrimScan);
    1876                 :             : 
    1877                 :             :                 /* parallel scan must never actually visit so->currPos blkno */
    1878   [ +  -  +  +  :        3319 :                 if (!seized && scan->parallel_scan != NULL &&
                   +  - ]
    1879                 :         202 :                         !_bt_parallel_seize(scan, &blkno, &lastcurrblkno, false))
    1880                 :             :                 {
    1881                 :             :                         /* whole scan is now done (or another primitive scan required) */
    1882                 :           0 :                         BTScanPosInvalidate(so->currPos);
    1883                 :           0 :                         return false;
    1884                 :             :                 }
    1885                 :             : 
    1886         [ +  + ]:        3319 :                 if (ScanDirectionIsForward(dir))
    1887                 :             :                 {
    1888                 :             :                         /* read blkno, but check for interrupts first */
    1889         [ +  - ]:        3310 :                         CHECK_FOR_INTERRUPTS();
    1890                 :        3310 :                         so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
    1891                 :        3310 :                 }
    1892                 :             :                 else
    1893                 :             :                 {
    1894                 :             :                         /* read blkno, avoiding race (also checks for interrupts) */
    1895                 :          18 :                         so->currPos.buf = _bt_lock_and_validate_left(rel, &blkno,
    1896                 :           9 :                                                                                                                  lastcurrblkno);
    1897         [ +  - ]:           9 :                         if (so->currPos.buf == InvalidBuffer)
    1898                 :             :                         {
    1899                 :             :                                 /* must have been a concurrent deletion of leftmost page */
    1900                 :           0 :                                 BTScanPosInvalidate(so->currPos);
    1901                 :           0 :                                 _bt_parallel_done(scan);
    1902                 :           0 :                                 return false;
    1903                 :             :                         }
    1904                 :             :                 }
    1905                 :             : 
    1906                 :        3319 :                 page = BufferGetPage(so->currPos.buf);
    1907                 :        3319 :                 opaque = BTPageGetOpaque(page);
    1908                 :        3319 :                 lastcurrblkno = blkno;
    1909         [ +  - ]:        3319 :                 if (likely(!P_IGNORE(opaque)))
    1910                 :             :                 {
    1911                 :             :                         /* see if there are any matches on this page */
    1912         [ +  + ]:        3319 :                         if (ScanDirectionIsForward(dir))
    1913                 :             :                         {
    1914                 :             :                                 /* note that this will clear moreRight if we can stop */
    1915         [ +  + ]:        3310 :                                 if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque), seized))
    1916                 :        2936 :                                         break;
    1917                 :         374 :                                 blkno = so->currPos.nextPage;
    1918                 :         374 :                         }
    1919                 :             :                         else
    1920                 :             :                         {
    1921                 :             :                                 /* note that this will clear moreLeft if we can stop */
    1922         [ +  + ]:           9 :                                 if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page), seized))
    1923                 :           5 :                                         break;
    1924                 :           4 :                                 blkno = so->currPos.prevPage;
    1925                 :             :                         }
    1926                 :         378 :                 }
    1927                 :             :                 else
    1928                 :             :                 {
    1929                 :             :                         /* _bt_readpage not called, so do all this for ourselves */
    1930         [ #  # ]:           0 :                         if (ScanDirectionIsForward(dir))
    1931                 :           0 :                                 blkno = opaque->btpo_next;
    1932                 :             :                         else
    1933                 :           0 :                                 blkno = opaque->btpo_prev;
    1934         [ #  # ]:           0 :                         if (scan->parallel_scan != NULL)
    1935                 :           0 :                                 _bt_parallel_release(scan, blkno, lastcurrblkno);
    1936                 :             :                 }
    1937                 :             : 
    1938                 :             :                 /* no matching tuples on this page */
    1939                 :         378 :                 _bt_relbuf(rel, so->currPos.buf);
    1940                 :         378 :                 seized = false;                 /* released by _bt_readpage (or by us) */
    1941      [ +  +  + ]:      490057 :         }
    1942                 :             : 
    1943                 :             :         /*
    1944                 :             :          * _bt_readpage succeeded.  Drop the lock (and maybe the pin) on
    1945                 :             :          * so->currPos.buf in preparation for btgettuple returning tuples.
    1946                 :             :          */
    1947         [ +  - ]:        2941 :         Assert(so->currPos.currPage == blkno);
    1948   [ -  +  #  #  :        2941 :         Assert(BTScanPosIsPinned(so->currPos));
                   +  - ]
    1949                 :        2941 :         _bt_drop_lock_and_maybe_pin(rel, so);
    1950                 :             : 
    1951                 :        2941 :         return true;
    1952                 :      489679 : }
    1953                 :             : 
    1954                 :             : /*
    1955                 :             :  * _bt_lock_and_validate_left() -- lock caller's left sibling blkno,
    1956                 :             :  * recovering from concurrent page splits/page deletions when necessary
    1957                 :             :  *
    1958                 :             :  * Called during backwards scans, to deal with their unique concurrency rules.
    1959                 :             :  *
    1960                 :             :  * blkno points to the block number of the page that we expect to move the
    1961                 :             :  * scan to.  We'll successfully move the scan there when we find that its
    1962                 :             :  * right sibling link still points to lastcurrblkno (the page we just read).
    1963                 :             :  * Otherwise, we have to figure out which page is the correct one for the scan
    1964                 :             :  * to now read the hard way, reasoning about concurrent splits and deletions.
    1965                 :             :  * See nbtree/README.
    1966                 :             :  *
    1967                 :             :  * On return, we have both a pin and a read lock on the returned page, whose
    1968                 :             :  * block number will be set in *blkno.  Returns InvalidBuffer if there is no
    1969                 :             :  * page to the left (no lock or pin is held in that case).
    1970                 :             :  *
    1971                 :             :  * It is possible for the returned leaf page to be half-dead; caller must
    1972                 :             :  * check that condition and step left again when required.
    1973                 :             :  */
    1974                 :             : static Buffer
    1975                 :           9 : _bt_lock_and_validate_left(Relation rel, BlockNumber *blkno,
    1976                 :             :                                                    BlockNumber lastcurrblkno)
    1977                 :             : {
    1978                 :           9 :         BlockNumber origblkno = *blkno; /* detects circular links */
    1979                 :             : 
    1980                 :           9 :         for (;;)
    1981                 :             :         {
    1982                 :           9 :                 Buffer          buf;
    1983                 :           9 :                 Page            page;
    1984                 :           9 :                 BTPageOpaque opaque;
    1985                 :           9 :                 int                     tries;
    1986                 :             : 
    1987                 :             :                 /* check for interrupts while we're not holding any buffer lock */
    1988         [ +  - ]:           9 :                 CHECK_FOR_INTERRUPTS();
    1989                 :           9 :                 buf = _bt_getbuf(rel, *blkno, BT_READ);
    1990                 :           9 :                 page = BufferGetPage(buf);
    1991                 :           9 :                 opaque = BTPageGetOpaque(page);
    1992                 :             : 
    1993                 :             :                 /*
    1994                 :             :                  * If this isn't the page we want, walk right till we find what we
    1995                 :             :                  * want --- but go no more than four hops (an arbitrary limit). If we
    1996                 :             :                  * don't find the correct page by then, the most likely bet is that
    1997                 :             :                  * lastcurrblkno got deleted and isn't in the sibling chain at all
    1998                 :             :                  * anymore, not that its left sibling got split more than four times.
    1999                 :             :                  *
    2000                 :             :                  * Note that it is correct to test P_ISDELETED not P_IGNORE here,
    2001                 :             :                  * because half-dead pages are still in the sibling chain.
    2002                 :             :                  */
    2003                 :           9 :                 tries = 0;
    2004                 :           9 :                 for (;;)
    2005                 :             :                 {
    2006   [ -  +  -  + ]:           9 :                         if (likely(!P_ISDELETED(opaque) &&
    2007                 :             :                                            opaque->btpo_next == lastcurrblkno))
    2008                 :             :                         {
    2009                 :             :                                 /* Found desired page, return it */
    2010                 :           9 :                                 return buf;
    2011                 :             :                         }
    2012   [ #  #  #  # ]:           0 :                         if (P_RIGHTMOST(opaque) || ++tries > 4)
    2013                 :           0 :                                 break;
    2014                 :             :                         /* step right */
    2015                 :           0 :                         *blkno = opaque->btpo_next;
    2016                 :           0 :                         buf = _bt_relandgetbuf(rel, buf, *blkno, BT_READ);
    2017                 :           0 :                         page = BufferGetPage(buf);
    2018                 :           0 :                         opaque = BTPageGetOpaque(page);
    2019                 :             :                 }
    2020                 :             : 
    2021                 :             :                 /*
    2022                 :             :                  * Return to the original page (usually the page most recently read by
    2023                 :             :                  * _bt_readpage, which is passed by caller as lastcurrblkno) to see
    2024                 :             :                  * what's up with its prev sibling link
    2025                 :             :                  */
    2026                 :           0 :                 buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ);
    2027                 :           0 :                 page = BufferGetPage(buf);
    2028                 :           0 :                 opaque = BTPageGetOpaque(page);
    2029         [ #  # ]:           0 :                 if (P_ISDELETED(opaque))
    2030                 :             :                 {
    2031                 :             :                         /*
    2032                 :             :                          * It was deleted.  Move right to first nondeleted page (there
    2033                 :             :                          * must be one); that is the page that has acquired the deleted
    2034                 :             :                          * one's keyspace, so stepping left from it will take us where we
    2035                 :             :                          * want to be.
    2036                 :             :                          */
    2037                 :           0 :                         for (;;)
    2038                 :             :                         {
    2039         [ #  # ]:           0 :                                 if (P_RIGHTMOST(opaque))
    2040   [ #  #  #  # ]:           0 :                                         elog(ERROR, "fell off the end of index \"%s\"",
    2041                 :             :                                                  RelationGetRelationName(rel));
    2042                 :           0 :                                 lastcurrblkno = opaque->btpo_next;
    2043                 :           0 :                                 buf = _bt_relandgetbuf(rel, buf, lastcurrblkno, BT_READ);
    2044                 :           0 :                                 page = BufferGetPage(buf);
    2045                 :           0 :                                 opaque = BTPageGetOpaque(page);
    2046         [ #  # ]:           0 :                                 if (!P_ISDELETED(opaque))
    2047                 :           0 :                                         break;
    2048                 :             :                         }
    2049                 :           0 :                 }
    2050                 :             :                 else
    2051                 :             :                 {
    2052                 :             :                         /*
    2053                 :             :                          * Original lastcurrblkno wasn't deleted; the explanation had
    2054                 :             :                          * better be that the page to the left got split or deleted.
    2055                 :             :                          * Without this check, we risk going into an infinite loop.
    2056                 :             :                          */
    2057         [ #  # ]:           0 :                         if (opaque->btpo_prev == origblkno)
    2058   [ #  #  #  # ]:           0 :                                 elog(ERROR, "could not find left sibling of block %u in index \"%s\"",
    2059                 :             :                                          lastcurrblkno, RelationGetRelationName(rel));
    2060                 :             :                         /* Okay to try again, since left sibling link changed */
    2061                 :             :                 }
    2062                 :             : 
    2063                 :             :                 /*
    2064                 :             :                  * Original lastcurrblkno from caller was concurrently deleted (could
    2065                 :             :                  * also have been a great many concurrent left sibling page splits).
    2066                 :             :                  * Found a non-deleted page that should now act as our lastcurrblkno.
    2067                 :             :                  */
    2068         [ #  # ]:           0 :                 if (P_LEFTMOST(opaque))
    2069                 :             :                 {
    2070                 :             :                         /* New lastcurrblkno has no left sibling (concurrently deleted) */
    2071                 :           0 :                         _bt_relbuf(rel, buf);
    2072                 :           0 :                         break;
    2073                 :             :                 }
    2074                 :             : 
    2075                 :             :                 /* Start from scratch with new lastcurrblkno's blkno/prev link */
    2076                 :           0 :                 *blkno = origblkno = opaque->btpo_prev;
    2077                 :           0 :                 _bt_relbuf(rel, buf);
    2078      [ +  -  - ]:           9 :         }
    2079                 :             : 
    2080                 :           0 :         return InvalidBuffer;
    2081                 :           9 : }
    2082                 :             : 
    2083                 :             : /*
    2084                 :             :  * _bt_get_endpoint() -- Find the first or last page on a given tree level
    2085                 :             :  *
    2086                 :             :  * If the index is empty, we will return InvalidBuffer; any other failure
    2087                 :             :  * condition causes ereport().  We will not return a dead page.
    2088                 :             :  *
    2089                 :             :  * The returned buffer is pinned and read-locked.
    2090                 :             :  */
    2091                 :             : Buffer
    2092                 :        6452 : _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
    2093                 :             : {
    2094                 :        6452 :         Buffer          buf;
    2095                 :        6452 :         Page            page;
    2096                 :        6452 :         BTPageOpaque opaque;
    2097                 :        6452 :         OffsetNumber offnum;
    2098                 :        6452 :         BlockNumber blkno;
    2099                 :        6452 :         IndexTuple      itup;
    2100                 :             : 
    2101                 :             :         /*
    2102                 :             :          * If we are looking for a leaf page, okay to descend from fast root;
    2103                 :             :          * otherwise better descend from true root.  (There is no point in being
    2104                 :             :          * smarter about intermediate levels.)
    2105                 :             :          */
    2106         [ +  + ]:        6452 :         if (level == 0)
    2107                 :        6449 :                 buf = _bt_getroot(rel, NULL, BT_READ);
    2108                 :             :         else
    2109                 :           3 :                 buf = _bt_gettrueroot(rel);
    2110                 :             : 
    2111         [ +  + ]:        6452 :         if (!BufferIsValid(buf))
    2112                 :         332 :                 return InvalidBuffer;
    2113                 :             : 
    2114                 :        6120 :         page = BufferGetPage(buf);
    2115                 :        6120 :         opaque = BTPageGetOpaque(page);
    2116                 :             : 
    2117                 :        8686 :         for (;;)
    2118                 :             :         {
    2119                 :             :                 /*
    2120                 :             :                  * If we landed on a deleted page, step right to find a live page
    2121                 :             :                  * (there must be one).  Also, if we want the rightmost page, step
    2122                 :             :                  * right if needed to get to it (this could happen if the page split
    2123                 :             :                  * since we obtained a pointer to it).
    2124                 :             :                  */
    2125   [ +  -  -  + ]:       17372 :                 while (P_IGNORE(opaque) ||
    2126         [ +  + ]:        8686 :                            (rightmost && !P_RIGHTMOST(opaque)))
    2127                 :             :                 {
    2128                 :           0 :                         blkno = opaque->btpo_next;
    2129         [ #  # ]:           0 :                         if (blkno == P_NONE)
    2130   [ #  #  #  # ]:           0 :                                 elog(ERROR, "fell off the end of index \"%s\"",
    2131                 :             :                                          RelationGetRelationName(rel));
    2132                 :           0 :                         buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
    2133                 :           0 :                         page = BufferGetPage(buf);
    2134                 :           0 :                         opaque = BTPageGetOpaque(page);
    2135                 :             :                 }
    2136                 :             : 
    2137                 :             :                 /* Done? */
    2138         [ +  + ]:        8686 :                 if (opaque->btpo_level == level)
    2139                 :        6120 :                         break;
    2140         [ +  - ]:        2566 :                 if (opaque->btpo_level < level)
    2141   [ #  #  #  # ]:           0 :                         ereport(ERROR,
    2142                 :             :                                         (errcode(ERRCODE_INDEX_CORRUPTED),
    2143                 :             :                                          errmsg_internal("btree level %u not found in index \"%s\"",
    2144                 :             :                                                                          level, RelationGetRelationName(rel))));
    2145                 :             : 
    2146                 :             :                 /* Descend to leftmost or rightmost child page */
    2147         [ +  + ]:        2566 :                 if (rightmost)
    2148                 :           1 :                         offnum = PageGetMaxOffsetNumber(page);
    2149                 :             :                 else
    2150                 :        2565 :                         offnum = P_FIRSTDATAKEY(opaque);
    2151                 :             : 
    2152         [ +  - ]:        2566 :                 if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
    2153   [ #  #  #  # ]:           0 :                         elog(PANIC, "offnum out of range");
    2154                 :             : 
    2155                 :        2566 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
    2156                 :        2566 :                 blkno = BTreeTupleGetDownLink(itup);
    2157                 :             : 
    2158                 :        2566 :                 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
    2159                 :        2566 :                 page = BufferGetPage(buf);
    2160                 :        2566 :                 opaque = BTPageGetOpaque(page);
    2161                 :             :         }
    2162                 :             : 
    2163                 :        6120 :         return buf;
    2164                 :        6452 : }
    2165                 :             : 
    2166                 :             : /*
    2167                 :             :  *      _bt_endpoint() -- Find the first or last page in the index, and scan
    2168                 :             :  * from there to the first key satisfying all the quals.
    2169                 :             :  *
    2170                 :             :  * This is used by _bt_first() to set up a scan when we've determined
    2171                 :             :  * that the scan must start at the beginning or end of the index (for
    2172                 :             :  * a forward or backward scan respectively).
    2173                 :             :  *
    2174                 :             :  * Parallel scan callers must have seized the scan before calling here.
    2175                 :             :  * Exit conditions are the same as for _bt_first().
    2176                 :             :  */
    2177                 :             : static bool
    2178                 :        6449 : _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
    2179                 :             : {
    2180                 :        6449 :         Relation        rel = scan->indexRelation;
    2181                 :        6449 :         BTScanOpaque so = (BTScanOpaque) scan->opaque;
    2182                 :        6449 :         Page            page;
    2183                 :        6449 :         BTPageOpaque opaque;
    2184                 :        6449 :         OffsetNumber start;
    2185                 :             : 
    2186   [ +  -  +  -  :        6449 :         Assert(!BTScanPosIsValid(so->currPos));
                   +  - ]
    2187         [ +  - ]:        6449 :         Assert(!so->needPrimScan);
    2188                 :             : 
    2189                 :             :         /*
    2190                 :             :          * Scan down to the leftmost or rightmost leaf page.  This is a simplified
    2191                 :             :          * version of _bt_search().
    2192                 :             :          */
    2193                 :        6449 :         so->currPos.buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
    2194                 :             : 
    2195         [ +  + ]:        6449 :         if (!BufferIsValid(so->currPos.buf))
    2196                 :             :         {
    2197                 :             :                 /*
    2198                 :             :                  * Empty index. Lock the whole relation, as nothing finer to lock
    2199                 :             :                  * exists.
    2200                 :             :                  */
    2201                 :         332 :                 PredicateLockRelation(rel, scan->xs_snapshot);
    2202                 :         332 :                 _bt_parallel_done(scan);
    2203                 :         332 :                 return false;
    2204                 :             :         }
    2205                 :             : 
    2206                 :        6117 :         page = BufferGetPage(so->currPos.buf);
    2207                 :        6117 :         opaque = BTPageGetOpaque(page);
    2208         [ +  - ]:        6117 :         Assert(P_ISLEAF(opaque));
    2209                 :             : 
    2210         [ +  + ]:        6117 :         if (ScanDirectionIsForward(dir))
    2211                 :             :         {
    2212                 :             :                 /* There could be dead pages to the left, so not this: */
    2213                 :             :                 /* Assert(P_LEFTMOST(opaque)); */
    2214                 :             : 
    2215                 :        6109 :                 start = P_FIRSTDATAKEY(opaque);
    2216                 :        6109 :         }
    2217         [ +  - ]:           8 :         else if (ScanDirectionIsBackward(dir))
    2218                 :             :         {
    2219         [ +  - ]:           8 :                 Assert(P_RIGHTMOST(opaque));
    2220                 :             : 
    2221                 :           8 :                 start = PageGetMaxOffsetNumber(page);
    2222                 :           8 :         }
    2223                 :             :         else
    2224                 :             :         {
    2225   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid scan direction: %d", (int) dir);
    2226                 :           0 :                 start = 0;                              /* keep compiler quiet */
    2227                 :             :         }
    2228                 :             : 
    2229                 :             :         /*
    2230                 :             :          * Now load data from the first page of the scan.
    2231                 :             :          */
    2232         [ +  + ]:        6117 :         if (!_bt_readfirstpage(scan, start, dir))
    2233                 :         126 :                 return false;
    2234                 :             : 
    2235                 :        5991 :         _bt_returnitem(scan, so);
    2236                 :        5991 :         return true;
    2237                 :        6449 : }
        

Generated by: LCOV version 2.3.2-1