LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 92.2 % 359 331
Test Date: 2026-01-26 10:56:24 Functions: 100.0 % 13 13
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 75.9 % 220 167

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * nbtsplitloc.c
       4                 :             :  *        Choose split point code for Postgres btree implementation.
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/access/nbtree/nbtsplitloc.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "access/nbtree.h"
      18                 :             : #include "access/tableam.h"
      19                 :             : #include "common/int.h"
      20                 :             : 
      21                 :             : typedef enum
      22                 :             : {
      23                 :             :         /* strategy for searching through materialized list of split points */
      24                 :             :         SPLIT_DEFAULT,                          /* give some weight to truncation */
      25                 :             :         SPLIT_MANY_DUPLICATES,          /* find minimally distinguishing point */
      26                 :             :         SPLIT_SINGLE_VALUE,                     /* leave left page almost full */
      27                 :             : } FindSplitStrat;
      28                 :             : 
      29                 :             : typedef struct
      30                 :             : {
      31                 :             :         /* details of free space left by split */
      32                 :             :         int16           curdelta;               /* current leftfree/rightfree delta */
      33                 :             :         int16           leftfree;               /* space left on left page post-split */
      34                 :             :         int16           rightfree;              /* space left on right page post-split */
      35                 :             : 
      36                 :             :         /* split point identifying fields (returned by _bt_findsplitloc) */
      37                 :             :         OffsetNumber firstrightoff; /* first origpage item on rightpage */
      38                 :             :         bool            newitemonleft;  /* new item goes on left, or right? */
      39                 :             : } SplitPoint;
      40                 :             : 
      41                 :             : typedef struct
      42                 :             : {
      43                 :             :         /* context data for _bt_recsplitloc */
      44                 :             :         Relation        rel;                    /* index relation */
      45                 :             :         Page            origpage;               /* page undergoing split */
      46                 :             :         IndexTuple      newitem;                /* new item (cause of page split) */
      47                 :             :         Size            newitemsz;              /* size of newitem (includes line pointer) */
      48                 :             :         bool            is_leaf;                /* T if splitting a leaf page */
      49                 :             :         bool            is_rightmost;   /* T if splitting rightmost page on level */
      50                 :             :         OffsetNumber newitemoff;        /* where the new item is to be inserted */
      51                 :             :         int                     leftspace;              /* space available for items on left page */
      52                 :             :         int                     rightspace;             /* space available for items on right page */
      53                 :             :         int                     olddataitemstotal;      /* space taken by old items */
      54                 :             :         Size            minfirstrightsz;        /* smallest firstright size */
      55                 :             : 
      56                 :             :         /* candidate split point data */
      57                 :             :         int                     maxsplits;              /* maximum number of splits */
      58                 :             :         int                     nsplits;                /* current number of splits */
      59                 :             :         SplitPoint *splits;                     /* all candidate split points for page */
      60                 :             :         int                     interval;               /* current range of acceptable split points */
      61                 :             : } FindSplitData;
      62                 :             : 
      63                 :             : static void _bt_recsplitloc(FindSplitData *state,
      64                 :             :                                                         OffsetNumber firstrightoff, bool newitemonleft,
      65                 :             :                                                         int olddataitemstoleft,
      66                 :             :                                                         Size firstrightofforigpagetuplesz);
      67                 :             : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      68                 :             :                                                                 bool usemult);
      69                 :             : static int      _bt_splitcmp(const void *arg1, const void *arg2);
      70                 :             : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      71                 :             :                                                                 int leaffillfactor, bool *usemult);
      72                 :             : static bool _bt_adjacenthtid(const ItemPointerData *lowhtid, const ItemPointerData *highhtid);
      73                 :             : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      74                 :             :                                                                          bool *newitemonleft, FindSplitStrat strategy);
      75                 :             : static int      _bt_defaultinterval(FindSplitData *state);
      76                 :             : static int      _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      77                 :             :                                                  SplitPoint *rightpage, FindSplitStrat *strategy);
      78                 :             : static void _bt_interval_edges(FindSplitData *state,
      79                 :             :                                                            SplitPoint **leftinterval, SplitPoint **rightinterval);
      80                 :             : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      81                 :             : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      82                 :             :                                                                                         SplitPoint *split);
      83                 :             : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      84                 :             :                                                                                           SplitPoint *split);
      85                 :             : 
      86                 :             : 
      87                 :             : /*
      88                 :             :  *      _bt_findsplitloc() -- find an appropriate place to split a page.
      89                 :             :  *
      90                 :             :  * The main goal here is to equalize the free space that will be on each
      91                 :             :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      92                 :             :  * account for it, we might find ourselves with too little room on the page
      93                 :             :  * that it needs to go into!)
      94                 :             :  *
      95                 :             :  * If the page is the rightmost page on its level, we instead try to arrange
      96                 :             :  * to leave the left split page fillfactor% full.  In this way, when we are
      97                 :             :  * inserting successively increasing keys (consider sequences, timestamps,
      98                 :             :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      99                 :             :  * instead of the 50% full result that we'd get without this special case.
     100                 :             :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     101                 :             :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     102                 :             :  * there are a number of further special cases where fillfactor is not
     103                 :             :  * applied in the standard way.
     104                 :             :  *
     105                 :             :  * We are passed the intended insert position of the new tuple, expressed as
     106                 :             :  * the offsetnumber of the tuple it must go in front of (this could be
     107                 :             :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     108                 :             :  * passed, since it's needed to give some weight to how effective suffix
     109                 :             :  * truncation will be.  The implementation picks the split point that
     110                 :             :  * maximizes the effectiveness of suffix truncation from a small list of
     111                 :             :  * alternative candidate split points that leave each side of the split with
     112                 :             :  * about the same share of free space.  Suffix truncation is secondary to
     113                 :             :  * equalizing free space, except in cases with large numbers of duplicates.
     114                 :             :  * Note that it is always assumed that caller goes on to perform truncation,
     115                 :             :  * even with pg_upgrade'd indexes where that isn't actually the case
     116                 :             :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     117                 :             :  * suffix truncation.
     118                 :             :  *
     119                 :             :  * We return the index of the first existing tuple that should go on the
     120                 :             :  * righthand page (which is called firstrightoff), plus a boolean
     121                 :             :  * indicating whether the new tuple goes on the left or right page.  You
     122                 :             :  * can think of the returned state as a point _between_ two adjacent data
     123                 :             :  * items (lastleft and firstright data items) on an imaginary version of
     124                 :             :  * origpage that already includes newitem.  The bool is necessary to
     125                 :             :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     126                 :             :  * sometimes needed to determine if the firstright tuple for the split is
     127                 :             :  * newitem rather than the tuple from origpage at offset firstrightoff).
     128                 :             :  */
     129                 :             : OffsetNumber
     130                 :        2309 : _bt_findsplitloc(Relation rel,
     131                 :             :                                  Page origpage,
     132                 :             :                                  OffsetNumber newitemoff,
     133                 :             :                                  Size newitemsz,
     134                 :             :                                  IndexTuple newitem,
     135                 :             :                                  bool *newitemonleft)
     136                 :             : {
     137                 :        2309 :         BTPageOpaque opaque;
     138                 :        2309 :         int                     leftspace,
     139                 :             :                                 rightspace,
     140                 :             :                                 olddataitemstotal,
     141                 :             :                                 olddataitemstoleft,
     142                 :             :                                 perfectpenalty,
     143                 :             :                                 leaffillfactor;
     144                 :        2309 :         FindSplitData state;
     145                 :        2309 :         FindSplitStrat strategy;
     146                 :        2309 :         ItemId          itemid;
     147                 :        2309 :         OffsetNumber offnum,
     148                 :             :                                 maxoff,
     149                 :             :                                 firstrightoff;
     150                 :        2309 :         double          fillfactormult;
     151                 :        2309 :         bool            usemult;
     152                 :        2309 :         SplitPoint      leftpage,
     153                 :             :                                 rightpage;
     154                 :             : 
     155                 :        2309 :         opaque = BTPageGetOpaque(origpage);
     156                 :        2309 :         maxoff = PageGetMaxOffsetNumber(origpage);
     157                 :             : 
     158                 :             :         /* Total free space available on a btree page, after fixed overhead */
     159                 :        2309 :         leftspace = rightspace =
     160                 :        2309 :                 PageGetPageSize(origpage) - SizeOfPageHeaderData -
     161                 :             :                 MAXALIGN(sizeof(BTPageOpaqueData));
     162                 :             : 
     163                 :             :         /* The right page will have the same high key as the old page */
     164         [ +  + ]:        2309 :         if (!P_RIGHTMOST(opaque))
     165                 :             :         {
     166                 :         613 :                 itemid = PageGetItemId(origpage, P_HIKEY);
     167                 :         613 :                 rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     168                 :             :                                                          sizeof(ItemIdData));
     169                 :         613 :         }
     170                 :             : 
     171                 :             :         /* Count up total space in data items before actually scanning 'em */
     172                 :        2309 :         olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     173   [ +  -  +  + ]:        2309 :         leaffillfactor = BTGetFillFactor(rel);
     174                 :             : 
     175                 :             :         /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     176                 :        2309 :         newitemsz += sizeof(ItemIdData);
     177                 :        2309 :         state.rel = rel;
     178                 :        2309 :         state.origpage = origpage;
     179                 :        2309 :         state.newitem = newitem;
     180                 :        2309 :         state.newitemsz = newitemsz;
     181                 :        2309 :         state.is_leaf = P_ISLEAF(opaque);
     182                 :        2309 :         state.is_rightmost = P_RIGHTMOST(opaque);
     183                 :        2309 :         state.leftspace = leftspace;
     184                 :        2309 :         state.rightspace = rightspace;
     185                 :        2309 :         state.olddataitemstotal = olddataitemstotal;
     186                 :        2309 :         state.minfirstrightsz = SIZE_MAX;
     187                 :        2309 :         state.newitemoff = newitemoff;
     188                 :             : 
     189                 :             :         /* newitem cannot be a posting list item */
     190         [ +  - ]:        2309 :         Assert(!BTreeTupleIsPosting(newitem));
     191                 :             : 
     192                 :             :         /*
     193                 :             :          * nsplits should never exceed maxoff because there will be at most as
     194                 :             :          * many candidate split points as there are points _between_ tuples, once
     195                 :             :          * you imagine that the new item is already on the original page (the
     196                 :             :          * final number of splits may be slightly lower because not all points
     197                 :             :          * between tuples will be legal).
     198                 :             :          */
     199                 :        2309 :         state.maxsplits = maxoff;
     200                 :        2309 :         state.splits = palloc_array(SplitPoint, state.maxsplits);
     201                 :        2309 :         state.nsplits = 0;
     202                 :             : 
     203                 :             :         /*
     204                 :             :          * Scan through the data items and calculate space usage for a split at
     205                 :             :          * each possible position
     206                 :             :          */
     207                 :        2309 :         olddataitemstoleft = 0;
     208                 :             : 
     209         [ +  + ]:      746411 :         for (offnum = P_FIRSTDATAKEY(opaque);
     210                 :      746411 :                  offnum <= maxoff;
     211                 :      744102 :                  offnum = OffsetNumberNext(offnum))
     212                 :             :         {
     213                 :      744102 :                 Size            itemsz;
     214                 :             : 
     215                 :      744102 :                 itemid = PageGetItemId(origpage, offnum);
     216                 :      744102 :                 itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     217                 :             : 
     218                 :             :                 /*
     219                 :             :                  * When item offset number is not newitemoff, neither side of the
     220                 :             :                  * split can be newitem.  Record a split after the previous data item
     221                 :             :                  * from original page, but before the current data item from original
     222                 :             :                  * page. (_bt_recsplitloc() will reject the split when there are no
     223                 :             :                  * previous items, which we rely on.)
     224                 :             :                  */
     225         [ +  + ]:      744102 :                 if (offnum < newitemoff)
     226                 :      712532 :                         _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     227         [ +  + ]:       31570 :                 else if (offnum > newitemoff)
     228                 :       30736 :                         _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     229                 :             :                 else
     230                 :             :                 {
     231                 :             :                         /*
     232                 :             :                          * Record a split after all "offnum < newitemoff" original page
     233                 :             :                          * data items, but before newitem
     234                 :             :                          */
     235                 :         834 :                         _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     236                 :             : 
     237                 :             :                         /*
     238                 :             :                          * Record a split after newitem, but before data item from
     239                 :             :                          * original page at offset newitemoff/current offset
     240                 :             :                          */
     241                 :         834 :                         _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     242                 :             :                 }
     243                 :             : 
     244                 :      744102 :                 olddataitemstoleft += itemsz;
     245                 :      744102 :         }
     246                 :             : 
     247                 :             :         /*
     248                 :             :          * Record a split after all original page data items, but before newitem.
     249                 :             :          * (Though only when it's possible that newitem will end up alone on new
     250                 :             :          * right page.)
     251                 :             :          */
     252         [ +  - ]:        2309 :         Assert(olddataitemstoleft == olddataitemstotal);
     253         [ +  + ]:        2309 :         if (newitemoff > maxoff)
     254                 :        1475 :                 _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     255                 :             : 
     256                 :             :         /*
     257                 :             :          * I believe it is not possible to fail to find a feasible split, but just
     258                 :             :          * in case ...
     259                 :             :          */
     260         [ +  - ]:        2309 :         if (state.nsplits == 0)
     261   [ #  #  #  # ]:           0 :                 elog(ERROR, "could not find a feasible split point for index \"%s\"",
     262                 :             :                          RelationGetRelationName(rel));
     263                 :             : 
     264                 :             :         /*
     265                 :             :          * Start search for a split point among list of legal split points.  Give
     266                 :             :          * primary consideration to equalizing available free space in each half
     267                 :             :          * of the split initially (start with default strategy), while applying
     268                 :             :          * rightmost and split-after-new-item optimizations where appropriate.
     269                 :             :          * Either of the two other fallback strategies may be required for cases
     270                 :             :          * with a large number of duplicates around the original/space-optimal
     271                 :             :          * split point.
     272                 :             :          *
     273                 :             :          * Default strategy gives some weight to suffix truncation in deciding a
     274                 :             :          * split point on leaf pages.  It attempts to select a split point where a
     275                 :             :          * distinguishing attribute appears earlier in the new high key for the
     276                 :             :          * left side of the split, in order to maximize the number of trailing
     277                 :             :          * attributes that can be truncated away.  Only candidate split points
     278                 :             :          * that imply an acceptable balance of free space on each side are
     279                 :             :          * considered.  See _bt_defaultinterval().
     280                 :             :          */
     281         [ +  + ]:        2309 :         if (!state.is_leaf)
     282                 :             :         {
     283                 :             :                 /* fillfactormult only used on rightmost page */
     284                 :          51 :                 usemult = state.is_rightmost;
     285                 :          51 :                 fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     286                 :          51 :         }
     287         [ +  + ]:        2258 :         else if (state.is_rightmost)
     288                 :             :         {
     289                 :             :                 /* Rightmost leaf page --  fillfactormult always used */
     290                 :        1670 :                 usemult = true;
     291                 :        1670 :                 fillfactormult = leaffillfactor / 100.0;
     292                 :        1670 :         }
     293         [ +  + ]:         588 :         else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     294                 :             :         {
     295                 :             :                 /*
     296                 :             :                  * New item inserted at rightmost point among a localized grouping on
     297                 :             :                  * a leaf page -- apply "split after new item" optimization, either by
     298                 :             :                  * applying leaf fillfactor multiplier, or by choosing the exact split
     299                 :             :                  * point that leaves newitem as lastleft. (usemult is set for us.)
     300                 :             :                  */
     301         [ +  - ]:           6 :                 if (usemult)
     302                 :             :                 {
     303                 :             :                         /* fillfactormult should be set based on leaf fillfactor */
     304                 :           6 :                         fillfactormult = leaffillfactor / 100.0;
     305                 :           6 :                 }
     306                 :             :                 else
     307                 :             :                 {
     308                 :             :                         /* find precise split point after newitemoff */
     309   [ #  #  #  # ]:           0 :                         for (int i = 0; i < state.nsplits; i++)
     310                 :             :                         {
     311                 :           0 :                                 SplitPoint *split = state.splits + i;
     312                 :             : 
     313   [ #  #  #  # ]:           0 :                                 if (split->newitemonleft &&
     314                 :           0 :                                         newitemoff == split->firstrightoff)
     315                 :             :                                 {
     316                 :           0 :                                         pfree(state.splits);
     317                 :           0 :                                         *newitemonleft = true;
     318                 :           0 :                                         return newitemoff;
     319                 :             :                                 }
     320         [ #  # ]:           0 :                         }
     321                 :             : 
     322                 :             :                         /*
     323                 :             :                          * Cannot legally split after newitemoff; proceed with split
     324                 :             :                          * without using fillfactor multiplier.  This is defensive, and
     325                 :             :                          * should never be needed in practice.
     326                 :             :                          */
     327                 :           0 :                         fillfactormult = 0.50;
     328                 :             :                 }
     329                 :           6 :         }
     330                 :             :         else
     331                 :             :         {
     332                 :             :                 /* Other leaf page.  50:50 page split. */
     333                 :         582 :                 usemult = false;
     334                 :             :                 /* fillfactormult not used, but be tidy */
     335                 :         582 :                 fillfactormult = 0.50;
     336                 :             :         }
     337                 :             : 
     338                 :             :         /*
     339                 :             :          * Save leftmost and rightmost splits for page before original ordinal
     340                 :             :          * sort order is lost by delta/fillfactormult sort
     341                 :             :          */
     342                 :        2309 :         leftpage = state.splits[0];
     343                 :        2309 :         rightpage = state.splits[state.nsplits - 1];
     344                 :             : 
     345                 :             :         /* Give split points a fillfactormult-wise delta, and sort on deltas */
     346                 :        2309 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     347                 :             : 
     348                 :             :         /* Determine split interval for default strategy */
     349                 :        2309 :         state.interval = _bt_defaultinterval(&state);
     350                 :             : 
     351                 :             :         /*
     352                 :             :          * Determine if default strategy/split interval will produce a
     353                 :             :          * sufficiently distinguishing split, or if we should change strategies.
     354                 :             :          * Alternative strategies change the range of split points that are
     355                 :             :          * considered acceptable (split interval), and possibly change
     356                 :             :          * fillfactormult, in order to deal with pages with a large number of
     357                 :             :          * duplicates gracefully.
     358                 :             :          *
     359                 :             :          * Pass low and high splits for the entire page (actually, they're for an
     360                 :             :          * imaginary version of the page that includes newitem).  These are used
     361                 :             :          * when the initial split interval encloses split points that are full of
     362                 :             :          * duplicates, and we need to consider if it's even possible to avoid
     363                 :             :          * appending a heap TID.
     364                 :             :          */
     365                 :        2309 :         perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     366                 :             : 
     367         [ +  + ]:        2309 :         if (strategy == SPLIT_DEFAULT)
     368                 :             :         {
     369                 :             :                 /*
     370                 :             :                  * Default strategy worked out (always works out with internal page).
     371                 :             :                  * Original split interval still stands.
     372                 :             :                  */
     373                 :        2260 :         }
     374                 :             : 
     375                 :             :         /*
     376                 :             :          * Many duplicates strategy is used when a heap TID would otherwise be
     377                 :             :          * appended, but the page isn't completely full of logical duplicates.
     378                 :             :          *
     379                 :             :          * The split interval is widened to include all legal candidate split
     380                 :             :          * points.  There might be a few as two distinct values in the whole-page
     381                 :             :          * split interval, though it's also possible that most of the values on
     382                 :             :          * the page are unique.  The final split point will either be to the
     383                 :             :          * immediate left or to the immediate right of the group of duplicate
     384                 :             :          * tuples that enclose the first/delta-optimal split point (perfect
     385                 :             :          * penalty was set so that the lowest delta split point that avoids
     386                 :             :          * appending a heap TID will be chosen).  Maximizing the number of
     387                 :             :          * attributes that can be truncated away is not a goal of the many
     388                 :             :          * duplicates strategy.
     389                 :             :          *
     390                 :             :          * Single value strategy is used when it is impossible to avoid appending
     391                 :             :          * a heap TID.  It arranges to leave the left page very full.  This
     392                 :             :          * maximizes space utilization in cases where tuples with the same
     393                 :             :          * attribute values span many pages.  Newly inserted duplicates will tend
     394                 :             :          * to have higher heap TID values, so we'll end up splitting to the right
     395                 :             :          * consistently.  (Single value strategy is harmless though not
     396                 :             :          * particularly useful with !heapkeyspace indexes.)
     397                 :             :          */
     398         [ +  + ]:          49 :         else if (strategy == SPLIT_MANY_DUPLICATES)
     399                 :             :         {
     400         [ +  - ]:           7 :                 Assert(state.is_leaf);
     401                 :             :                 /* Shouldn't try to truncate away extra user attributes */
     402         [ +  - ]:           7 :                 Assert(perfectpenalty ==
     403                 :             :                            IndexRelationGetNumberOfKeyAttributes(state.rel));
     404                 :             :                 /* No need to resort splits -- no change in fillfactormult/deltas */
     405                 :           7 :                 state.interval = state.nsplits;
     406                 :           7 :         }
     407         [ -  + ]:          42 :         else if (strategy == SPLIT_SINGLE_VALUE)
     408                 :             :         {
     409         [ +  - ]:          42 :                 Assert(state.is_leaf);
     410                 :             :                 /* Split near the end of the page */
     411                 :          42 :                 usemult = true;
     412                 :          42 :                 fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     413                 :             :                 /* Resort split points with new delta */
     414                 :          42 :                 _bt_deltasortsplits(&state, fillfactormult, usemult);
     415                 :             :                 /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     416                 :          42 :                 state.interval = 1;
     417                 :          42 :         }
     418                 :             : 
     419                 :             :         /*
     420                 :             :          * Search among acceptable split points (using final split interval) for
     421                 :             :          * the entry that has the lowest penalty, and is therefore expected to
     422                 :             :          * maximize fan-out.  Sets *newitemonleft for us.
     423                 :             :          */
     424                 :        4618 :         firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     425                 :        2309 :                                                                          strategy);
     426                 :        2309 :         pfree(state.splits);
     427                 :             : 
     428                 :        2309 :         return firstrightoff;
     429                 :        2309 : }
     430                 :             : 
     431                 :             : /*
     432                 :             :  * Subroutine to record a particular point between two tuples (possibly the
     433                 :             :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     434                 :             :  * settings) in *state for later analysis.  This is also a convenient point to
     435                 :             :  * check if the split is legal (if it isn't, it won't be recorded).
     436                 :             :  *
     437                 :             :  * firstrightoff is the offset of the first item on the original page that
     438                 :             :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     439                 :             :  * that tuple.  firstrightoff can be > max offset, which means that all the
     440                 :             :  * old items go to the left page and only the new item goes to the right page.
     441                 :             :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     442                 :             :  * we don't use it for _any_ split where the firstright tuple happens to be
     443                 :             :  * newitem).
     444                 :             :  *
     445                 :             :  * olddataitemstoleft is the total size of all old items to the left of the
     446                 :             :  * split point that is recorded here when legal.  Should not include
     447                 :             :  * newitemsz, since that is handled here.
     448                 :             :  */
     449                 :             : static void
     450                 :      746411 : _bt_recsplitloc(FindSplitData *state,
     451                 :             :                                 OffsetNumber firstrightoff,
     452                 :             :                                 bool newitemonleft,
     453                 :             :                                 int olddataitemstoleft,
     454                 :             :                                 Size firstrightofforigpagetuplesz)
     455                 :             : {
     456                 :      746411 :         int16           leftfree,
     457                 :             :                                 rightfree;
     458                 :      746411 :         Size            firstrightsz;
     459                 :      746411 :         Size            postingsz = 0;
     460                 :      746411 :         bool            newitemisfirstright;
     461                 :             : 
     462                 :             :         /* Is the new item going to be split point's firstright tuple? */
     463         [ +  + ]:      749554 :         newitemisfirstright = (firstrightoff == state->newitemoff &&
     464                 :        3143 :                                                    !newitemonleft);
     465                 :             : 
     466         [ +  + ]:      746411 :         if (newitemisfirstright)
     467                 :        2309 :                 firstrightsz = state->newitemsz;
     468                 :             :         else
     469                 :             :         {
     470                 :      744102 :                 firstrightsz = firstrightofforigpagetuplesz;
     471                 :             : 
     472                 :             :                 /*
     473                 :             :                  * Calculate suffix truncation space saving when firstright tuple is a
     474                 :             :                  * posting list tuple, though only when the tuple is over 64 bytes
     475                 :             :                  * including line pointer overhead (arbitrary).  This avoids accessing
     476                 :             :                  * the tuple in cases where its posting list must be very small (if
     477                 :             :                  * tuple has one at all).
     478                 :             :                  *
     479                 :             :                  * Note: We don't do this in the case where firstright tuple is
     480                 :             :                  * newitem, since newitem cannot have a posting list.
     481                 :             :                  */
     482   [ +  +  +  + ]:      744102 :                 if (state->is_leaf && firstrightsz > 64)
     483                 :             :                 {
     484                 :        4574 :                         ItemId          itemid;
     485                 :        4574 :                         IndexTuple      newhighkey;
     486                 :             : 
     487                 :        4574 :                         itemid = PageGetItemId(state->origpage, firstrightoff);
     488                 :        4574 :                         newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     489                 :             : 
     490         [ +  + ]:        4574 :                         if (BTreeTupleIsPosting(newhighkey))
     491                 :        5262 :                                 postingsz = IndexTupleSize(newhighkey) -
     492                 :        2631 :                                         BTreeTupleGetPostingOffset(newhighkey);
     493                 :        4574 :                 }
     494                 :             :         }
     495                 :             : 
     496                 :             :         /* Account for all the old tuples */
     497                 :      746411 :         leftfree = state->leftspace - olddataitemstoleft;
     498                 :     1492822 :         rightfree = state->rightspace -
     499                 :      746411 :                 (state->olddataitemstotal - olddataitemstoleft);
     500                 :             : 
     501                 :             :         /*
     502                 :             :          * The first item on the right page becomes the high key of the left page;
     503                 :             :          * therefore it counts against left space as well as right space (we
     504                 :             :          * cannot assume that suffix truncation will make it any smaller).  When
     505                 :             :          * index has included attributes, then those attributes of left page high
     506                 :             :          * key will be truncated leaving that page with slightly more free space.
     507                 :             :          * However, that shouldn't affect our ability to find valid split
     508                 :             :          * location, since we err in the direction of being pessimistic about free
     509                 :             :          * space on the left half.  Besides, even when suffix truncation of
     510                 :             :          * non-TID attributes occurs, the new high key often won't even be a
     511                 :             :          * single MAXALIGN() quantum smaller than the firstright tuple it's based
     512                 :             :          * on.
     513                 :             :          *
     514                 :             :          * If we are on the leaf level, assume that suffix truncation cannot avoid
     515                 :             :          * adding a heap TID to the left half's new high key when splitting at the
     516                 :             :          * leaf level.  In practice the new high key will often be smaller and
     517                 :             :          * will rarely be larger, but conservatively assume the worst case.  We do
     518                 :             :          * go to the trouble of subtracting away posting list overhead, though
     519                 :             :          * only when it looks like it will make an appreciable difference.
     520                 :             :          * (Posting lists are the only case where truncation will typically make
     521                 :             :          * the final high key far smaller than firstright, so being a bit more
     522                 :             :          * precise there noticeably improves the balance of free space.)
     523                 :             :          */
     524         [ +  + ]:      746411 :         if (state->is_leaf)
     525                 :     1492410 :                 leftfree -= (int16) (firstrightsz +
     526                 :      746205 :                                                          MAXALIGN(sizeof(ItemPointerData)) -
     527                 :      746205 :                                                          postingsz);
     528                 :             :         else
     529                 :         206 :                 leftfree -= (int16) firstrightsz;
     530                 :             : 
     531                 :             :         /* account for the new item */
     532         [ +  + ]:      746411 :         if (newitemonleft)
     533                 :       31570 :                 leftfree -= (int16) state->newitemsz;
     534                 :             :         else
     535                 :      714841 :                 rightfree -= (int16) state->newitemsz;
     536                 :             : 
     537                 :             :         /*
     538                 :             :          * If we are not on the leaf level, we will be able to discard the key
     539                 :             :          * data from the first item that winds up on the right page.
     540                 :             :          */
     541         [ +  + ]:      746411 :         if (!state->is_leaf)
     542                 :         206 :                 rightfree += (int16) firstrightsz -
     543                 :             :                         (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     544                 :             : 
     545                 :             :         /* Record split if legal */
     546   [ +  +  +  + ]:      746411 :         if (leftfree >= 0 && rightfree >= 0)
     547                 :             :         {
     548         [ +  - ]:      742033 :                 Assert(state->nsplits < state->maxsplits);
     549                 :             : 
     550                 :             :                 /* Determine smallest firstright tuple size among legal splits */
     551         [ +  + ]:      742033 :                 state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     552                 :             : 
     553                 :      742033 :                 state->splits[state->nsplits].curdelta = 0;
     554                 :      742033 :                 state->splits[state->nsplits].leftfree = leftfree;
     555                 :      742033 :                 state->splits[state->nsplits].rightfree = rightfree;
     556                 :      742033 :                 state->splits[state->nsplits].firstrightoff = firstrightoff;
     557                 :      742033 :                 state->splits[state->nsplits].newitemonleft = newitemonleft;
     558                 :      742033 :                 state->nsplits++;
     559                 :      742033 :         }
     560                 :      746411 : }
     561                 :             : 
     562                 :             : /*
     563                 :             :  * Subroutine to assign space deltas to materialized array of candidate split
     564                 :             :  * points based on current fillfactor, and to sort array using that fillfactor
     565                 :             :  */
     566                 :             : static void
     567                 :        2351 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     568                 :             :                                         bool usemult)
     569                 :             : {
     570         [ +  + ]:      745962 :         for (int i = 0; i < state->nsplits; i++)
     571                 :             :         {
     572                 :      743611 :                 SplitPoint *split = state->splits + i;
     573                 :      743611 :                 int16           delta;
     574                 :             : 
     575         [ +  + ]:      743611 :                 if (usemult)
     576                 :     1228698 :                         delta = fillfactormult * split->leftfree -
     577                 :      614349 :                                 (1.0 - fillfactormult) * split->rightfree;
     578                 :             :                 else
     579                 :      129262 :                         delta = split->leftfree - split->rightfree;
     580                 :             : 
     581         [ +  + ]:      743611 :                 if (delta < 0)
     582                 :      127219 :                         delta = -delta;
     583                 :             : 
     584                 :             :                 /* Save delta */
     585                 :      743611 :                 split->curdelta = delta;
     586                 :      743611 :         }
     587                 :             : 
     588                 :        2351 :         qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     589                 :        2351 : }
     590                 :             : 
     591                 :             : /*
     592                 :             :  * qsort-style comparator used by _bt_deltasortsplits()
     593                 :             :  */
     594                 :             : static int
     595                 :     8196351 : _bt_splitcmp(const void *arg1, const void *arg2)
     596                 :             : {
     597                 :     8196351 :         const SplitPoint *split1 = arg1;
     598                 :     8196351 :         const SplitPoint *split2 = arg2;
     599                 :             : 
     600                 :    16392702 :         return pg_cmp_s16(split1->curdelta, split2->curdelta);
     601                 :     8196351 : }
     602                 :             : 
     603                 :             : /*
     604                 :             :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     605                 :             :  * split immediately after the would-be original page offset for the
     606                 :             :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     607                 :             :  * to the right on original page).  This is appropriate when there is a
     608                 :             :  * pattern of localized monotonically increasing insertions into a composite
     609                 :             :  * index, where leading attribute values form local groupings, and we
     610                 :             :  * anticipate further insertions of the same/current grouping (new item's
     611                 :             :  * grouping) in the near future.  This can be thought of as a variation on
     612                 :             :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     613                 :             :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     614                 :             :  * time.
     615                 :             :  *
     616                 :             :  * We may leave extra free space remaining on the rightmost page of a "most
     617                 :             :  * significant column" grouping of tuples if that grouping never ends up
     618                 :             :  * having future insertions that use the free space.  That effect is
     619                 :             :  * self-limiting; a future grouping that becomes the "nearest on the right"
     620                 :             :  * grouping of the affected grouping usually puts the extra free space to good
     621                 :             :  * use.
     622                 :             :  *
     623                 :             :  * Caller uses optimization when routine returns true, though the exact action
     624                 :             :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     625                 :             :  * standard way rather than using the new item offset directly when *usemult
     626                 :             :  * was also set to true here.  Otherwise, caller applies optimization by
     627                 :             :  * locating the legal split point that makes the new tuple the lastleft tuple
     628                 :             :  * for the split.
     629                 :             :  */
     630                 :             : static bool
     631                 :         588 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     632                 :             :                                         int leaffillfactor, bool *usemult)
     633                 :             : {
     634                 :         588 :         int16           nkeyatts;
     635                 :         588 :         ItemId          itemid;
     636                 :         588 :         IndexTuple      tup;
     637                 :         588 :         int                     keepnatts;
     638                 :             : 
     639         [ +  - ]:         588 :         Assert(state->is_leaf && !state->is_rightmost);
     640                 :             : 
     641                 :         588 :         nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     642                 :             : 
     643                 :             :         /* Single key indexes not considered here */
     644         [ +  + ]:         588 :         if (nkeyatts == 1)
     645                 :          39 :                 return false;
     646                 :             : 
     647                 :             :         /* Ascending insertion pattern never inferred when new item is first */
     648         [ -  + ]:         549 :         if (state->newitemoff == P_FIRSTKEY)
     649                 :           0 :                 return false;
     650                 :             : 
     651                 :             :         /*
     652                 :             :          * Only apply optimization on pages with equisized tuples, since ordinal
     653                 :             :          * keys are likely to be fixed-width.  Testing if the new tuple is
     654                 :             :          * variable width directly might also work, but that fails to apply the
     655                 :             :          * optimization to indexes with a numeric_ops attribute.
     656                 :             :          *
     657                 :             :          * Conclude that page has equisized tuples when the new item is the same
     658                 :             :          * width as the smallest item observed during pass over page, and other
     659                 :             :          * non-pivot tuples must be the same width as well.  (Note that the
     660                 :             :          * possibly-truncated existing high key isn't counted in
     661                 :             :          * olddataitemstotal, and must be subtracted from maxoff.)
     662                 :             :          */
     663         [ +  + ]:         549 :         if (state->newitemsz != state->minfirstrightsz)
     664                 :         119 :                 return false;
     665         [ +  + ]:         430 :         if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     666                 :         416 :                 return false;
     667                 :             : 
     668                 :             :         /*
     669                 :             :          * Avoid applying optimization when tuples are wider than a tuple
     670                 :             :          * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     671                 :             :          * int4/int32 attributes)
     672                 :             :          */
     673         [ -  + ]:          14 :         if (state->newitemsz >
     674                 :             :                 MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     675                 :             :                 sizeof(ItemIdData))
     676                 :           0 :                 return false;
     677                 :             : 
     678                 :             :         /*
     679                 :             :          * At least the first attribute's value must be equal to the corresponding
     680                 :             :          * value in previous tuple to apply optimization.  New item cannot be a
     681                 :             :          * duplicate, either.
     682                 :             :          *
     683                 :             :          * Handle case where new item is to the right of all items on the existing
     684                 :             :          * page.  This is suggestive of monotonically increasing insertions in
     685                 :             :          * itself, so the "heap TID adjacency" test is not applied here.
     686                 :             :          */
     687         [ +  + ]:          14 :         if (state->newitemoff > maxoff)
     688                 :             :         {
     689                 :           6 :                 itemid = PageGetItemId(state->origpage, maxoff);
     690                 :           6 :                 tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     691                 :           6 :                 keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     692                 :             : 
     693   [ +  -  -  + ]:           6 :                 if (keepnatts > 1 && keepnatts <= nkeyatts)
     694                 :             :                 {
     695                 :           6 :                         *usemult = true;
     696                 :           6 :                         return true;
     697                 :             :                 }
     698                 :             : 
     699                 :           0 :                 return false;
     700                 :             :         }
     701                 :             : 
     702                 :             :         /*
     703                 :             :          * "Low cardinality leading column, high cardinality suffix column"
     704                 :             :          * indexes with a random insertion pattern (e.g., an index with a boolean
     705                 :             :          * column, such as an index on '(book_is_in_print, book_isbn)') present us
     706                 :             :          * with a risk of consistently misapplying the optimization.  We're
     707                 :             :          * willing to accept very occasional misapplication of the optimization,
     708                 :             :          * provided the cases where we get it wrong are rare and self-limiting.
     709                 :             :          *
     710                 :             :          * Heap TID adjacency strongly suggests that the item just to the left was
     711                 :             :          * inserted very recently, which limits overapplication of the
     712                 :             :          * optimization.  Besides, all inappropriate cases triggered here will
     713                 :             :          * still split in the middle of the page on average.
     714                 :             :          */
     715                 :           8 :         itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     716                 :           8 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     717                 :             :         /* Do cheaper test first */
     718   [ +  -  +  + ]:           8 :         if (BTreeTupleIsPosting(tup) ||
     719                 :           8 :                 !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     720                 :           7 :                 return false;
     721                 :             :         /* Check same conditions as rightmost item case, too */
     722                 :           1 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     723                 :             : 
     724   [ -  +  #  # ]:           1 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     725                 :             :         {
     726                 :           0 :                 double          interp = (double) state->newitemoff / ((double) maxoff + 1);
     727                 :           0 :                 double          leaffillfactormult = (double) leaffillfactor / 100.0;
     728                 :             : 
     729                 :             :                 /*
     730                 :             :                  * Don't allow caller to split after a new item when it will result in
     731                 :             :                  * a split point to the right of the point that a leaf fillfactor
     732                 :             :                  * split would use -- have caller apply leaf fillfactor instead
     733                 :             :                  */
     734                 :           0 :                 *usemult = interp > leaffillfactormult;
     735                 :             : 
     736                 :           0 :                 return true;
     737                 :           0 :         }
     738                 :             : 
     739                 :           1 :         return false;
     740                 :         588 : }
     741                 :             : 
     742                 :             : /*
     743                 :             :  * Subroutine for determining if two heap TIDS are "adjacent".
     744                 :             :  *
     745                 :             :  * Adjacent means that the high TID is very likely to have been inserted into
     746                 :             :  * heap relation immediately after the low TID, probably during the current
     747                 :             :  * transaction.
     748                 :             :  */
     749                 :             : static bool
     750                 :           8 : _bt_adjacenthtid(const ItemPointerData *lowhtid, const ItemPointerData *highhtid)
     751                 :             : {
     752                 :           8 :         BlockNumber lowblk,
     753                 :             :                                 highblk;
     754                 :             : 
     755                 :           8 :         lowblk = ItemPointerGetBlockNumber(lowhtid);
     756                 :           8 :         highblk = ItemPointerGetBlockNumber(highhtid);
     757                 :             : 
     758                 :             :         /* Make optimistic assumption of adjacency when heap blocks match */
     759         [ +  + ]:           8 :         if (lowblk == highblk)
     760                 :           1 :                 return true;
     761                 :             : 
     762                 :             :         /* When heap block one up, second offset should be FirstOffsetNumber */
     763   [ +  +  +  - ]:           7 :         if (lowblk + 1 == highblk &&
     764                 :           3 :                 ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     765                 :           0 :                 return true;
     766                 :             : 
     767                 :           7 :         return false;
     768                 :           8 : }
     769                 :             : 
     770                 :             : /*
     771                 :             :  * Subroutine to find the "best" split point among candidate split points.
     772                 :             :  * The best split point is the split point with the lowest penalty among split
     773                 :             :  * points that fall within current/final split interval.  Penalty is an
     774                 :             :  * abstract score, with a definition that varies depending on whether we're
     775                 :             :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     776                 :             :  * details.
     777                 :             :  *
     778                 :             :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     779                 :             :  * candidate split points.  This allows us to return early without wasting
     780                 :             :  * cycles on calculating the first differing attribute for all candidate
     781                 :             :  * splits when that clearly cannot improve our choice (or when we only want a
     782                 :             :  * minimally distinguishing split point, and don't want to make the split any
     783                 :             :  * more unbalanced than is necessary).
     784                 :             :  *
     785                 :             :  * We return the index of the first existing tuple that should go on the right
     786                 :             :  * page, plus a boolean indicating if new item is on left of split point.
     787                 :             :  */
     788                 :             : static OffsetNumber
     789                 :        2309 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     790                 :             :                                  bool *newitemonleft, FindSplitStrat strategy)
     791                 :             : {
     792                 :        2309 :         int                     bestpenalty,
     793                 :             :                                 lowsplit;
     794         [ +  + ]:        2309 :         int                     highsplit = Min(state->interval, state->nsplits);
     795                 :        2309 :         SplitPoint *final;
     796                 :             : 
     797                 :        2309 :         bestpenalty = INT_MAX;
     798                 :        2309 :         lowsplit = 0;
     799         [ +  + ]:        6442 :         for (int i = lowsplit; i < highsplit; i++)
     800                 :             :         {
     801                 :        4133 :                 int                     penalty;
     802                 :             : 
     803                 :        4133 :                 penalty = _bt_split_penalty(state, state->splits + i);
     804                 :             : 
     805         [ +  + ]:        4133 :                 if (penalty < bestpenalty)
     806                 :             :                 {
     807                 :        2734 :                         bestpenalty = penalty;
     808                 :        2734 :                         lowsplit = i;
     809                 :        2734 :                 }
     810                 :             : 
     811         [ +  + ]:        4133 :                 if (penalty <= perfectpenalty)
     812                 :        2300 :                         break;
     813         [ +  + ]:        4133 :         }
     814                 :             : 
     815                 :        2309 :         final = &state->splits[lowsplit];
     816                 :             : 
     817                 :             :         /*
     818                 :             :          * There is a risk that the "many duplicates" strategy will repeatedly do
     819                 :             :          * the wrong thing when there are monotonically decreasing insertions to
     820                 :             :          * the right of a large group of duplicates.   Repeated splits could leave
     821                 :             :          * a succession of right half pages with free space that can never be
     822                 :             :          * used.  This must be avoided.
     823                 :             :          *
     824                 :             :          * Consider the example of the leftmost page in a single integer attribute
     825                 :             :          * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     826                 :             :          * decreasing integer insertions might cause the same leftmost page to
     827                 :             :          * split repeatedly at the same point.  Each split derives its new high
     828                 :             :          * key from the lowest current value to the immediate right of the large
     829                 :             :          * group of NULLs, which will always be higher than all future integer
     830                 :             :          * insertions, directing all future integer insertions to the same
     831                 :             :          * leftmost page.
     832                 :             :          */
     833   [ +  +  +  + ]:        2309 :         if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     834   [ +  +  -  +  :           5 :                 !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
                   #  # ]
     835                 :           0 :                 final->firstrightoff < state->newitemoff + 9)
     836                 :             :         {
     837                 :             :                 /*
     838                 :             :                  * Avoid the problem by performing a 50:50 split when the new item is
     839                 :             :                  * just to the right of the would-be "many duplicates" split point.
     840                 :             :                  * (Note that the test used for an insert that is "just to the right"
     841                 :             :                  * of the split point is conservative.)
     842                 :             :                  */
     843                 :           0 :                 final = &state->splits[0];
     844                 :           0 :         }
     845                 :             : 
     846                 :        2309 :         *newitemonleft = final->newitemonleft;
     847                 :        4618 :         return final->firstrightoff;
     848                 :        2309 : }
     849                 :             : 
     850                 :             : #define LEAF_SPLIT_DISTANCE                     0.050
     851                 :             : #define INTERNAL_SPLIT_DISTANCE         0.075
     852                 :             : 
     853                 :             : /*
     854                 :             :  * Return a split interval to use for the default strategy.  This is a limit
     855                 :             :  * on the number of candidate split points to give further consideration to.
     856                 :             :  * Only a fraction of all candidate splits points (those located at the start
     857                 :             :  * of the now-sorted splits array) fall within the split interval.  Split
     858                 :             :  * interval is applied within _bt_bestsplitloc().
     859                 :             :  *
     860                 :             :  * Split interval represents an acceptable range of split points -- those that
     861                 :             :  * have leftfree and rightfree values that are acceptably balanced.  The final
     862                 :             :  * split point chosen is the split point with the lowest "penalty" among split
     863                 :             :  * points in this split interval (unless we change our entire strategy, in
     864                 :             :  * which case the interval also changes -- see _bt_strategy()).
     865                 :             :  *
     866                 :             :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     867                 :             :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     868                 :             :  * theoretical justification for the size of the split interval, though it's
     869                 :             :  * clear that a small split interval can make tuples on level L+1 much smaller
     870                 :             :  * on average, without noticeably affecting space utilization on level L.
     871                 :             :  * (Note that the way that we calculate split interval might need to change if
     872                 :             :  * suffix truncation is taught to truncate tuples "within" the last
     873                 :             :  * attribute/datum for data types like text, which is more or less how it is
     874                 :             :  * assumed to work in the paper.)
     875                 :             :  */
     876                 :             : static int
     877                 :        2309 : _bt_defaultinterval(FindSplitData *state)
     878                 :             : {
     879                 :        2309 :         SplitPoint *spaceoptimal;
     880                 :        2309 :         int16           tolerance,
     881                 :             :                                 lowleftfree,
     882                 :             :                                 lowrightfree,
     883                 :             :                                 highleftfree,
     884                 :             :                                 highrightfree;
     885                 :             : 
     886                 :             :         /*
     887                 :             :          * Determine leftfree and rightfree values that are higher and lower than
     888                 :             :          * we're willing to tolerate.  Note that the final split interval will be
     889                 :             :          * about 10% of nsplits in the common case where all non-pivot tuples
     890                 :             :          * (data items) from a leaf page are uniformly sized.  We're a bit more
     891                 :             :          * aggressive when splitting internal pages.
     892                 :             :          */
     893         [ +  + ]:        2309 :         if (state->is_leaf)
     894                 :        2258 :                 tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     895                 :             :         else
     896                 :          51 :                 tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     897                 :             : 
     898                 :             :         /* First candidate split point is the most evenly balanced */
     899                 :        2309 :         spaceoptimal = state->splits;
     900                 :        2309 :         lowleftfree = spaceoptimal->leftfree - tolerance;
     901                 :        2309 :         lowrightfree = spaceoptimal->rightfree - tolerance;
     902                 :        2309 :         highleftfree = spaceoptimal->leftfree + tolerance;
     903                 :        2309 :         highrightfree = spaceoptimal->rightfree + tolerance;
     904                 :             : 
     905                 :             :         /*
     906                 :             :          * Iterate through split points, starting from the split immediately after
     907                 :             :          * 'spaceoptimal'.  Find the first split point that divides free space so
     908                 :             :          * unevenly that including it in the split interval would be unacceptable.
     909                 :             :          */
     910   [ +  -  +  - ]:       77050 :         for (int i = 1; i < state->nsplits; i++)
     911                 :             :         {
     912                 :       74741 :                 SplitPoint *split = state->splits + i;
     913                 :             : 
     914                 :             :                 /* Cannot use curdelta here, since its value is often weighted */
     915   [ +  +  +  + ]:       74741 :                 if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     916   [ +  +  +  + ]:       72522 :                         split->leftfree > highleftfree || split->rightfree > highrightfree)
     917                 :        2309 :                         return i;
     918         [ +  + ]:       74741 :         }
     919                 :             : 
     920                 :           0 :         return state->nsplits;
     921                 :        2309 : }
     922                 :             : 
     923                 :             : /*
     924                 :             :  * Subroutine to decide whether split should use default strategy/initial
     925                 :             :  * split interval, or whether it should finish splitting the page using
     926                 :             :  * alternative strategies (this is only possible with leaf pages).
     927                 :             :  *
     928                 :             :  * Caller uses alternative strategy (or sticks with default strategy) based
     929                 :             :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     930                 :             :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     931                 :             :  * willing to go to avoid appending a heap TID when using the many duplicates
     932                 :             :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     933                 :             :  */
     934                 :             : static int
     935                 :        2309 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     936                 :             :                          SplitPoint *rightpage, FindSplitStrat *strategy)
     937                 :             : {
     938                 :        2309 :         IndexTuple      leftmost,
     939                 :             :                                 rightmost;
     940                 :        2309 :         SplitPoint *leftinterval,
     941                 :             :                            *rightinterval;
     942                 :        2309 :         int                     perfectpenalty;
     943                 :        2309 :         int                     indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     944                 :             : 
     945                 :             :         /* Assume that alternative strategy won't be used for now */
     946                 :        2309 :         *strategy = SPLIT_DEFAULT;
     947                 :             : 
     948                 :             :         /*
     949                 :             :          * Use smallest observed firstright item size for entire page (actually,
     950                 :             :          * entire imaginary version of page that includes newitem) as perfect
     951                 :             :          * penalty on internal pages.  This can save cycles in the common case
     952                 :             :          * where most or all splits (not just splits within interval) have
     953                 :             :          * firstright tuples that are the same size.
     954                 :             :          */
     955         [ +  + ]:        2309 :         if (!state->is_leaf)
     956                 :          51 :                 return state->minfirstrightsz;
     957                 :             : 
     958                 :             :         /*
     959                 :             :          * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     960                 :             :          * current split interval
     961                 :             :          */
     962                 :        2258 :         _bt_interval_edges(state, &leftinterval, &rightinterval);
     963                 :        2258 :         leftmost = _bt_split_lastleft(state, leftinterval);
     964                 :        2258 :         rightmost = _bt_split_firstright(state, rightinterval);
     965                 :             : 
     966                 :             :         /*
     967                 :             :          * If initial split interval can produce a split point that will at least
     968                 :             :          * avoid appending a heap TID in new high key, we're done.  Finish split
     969                 :             :          * with default strategy and initial split interval.
     970                 :             :          */
     971                 :        2258 :         perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     972         [ +  + ]:        2258 :         if (perfectpenalty <= indnkeyatts)
     973                 :        2190 :                 return perfectpenalty;
     974                 :             : 
     975                 :             :         /*
     976                 :             :          * Work out how caller should finish split when even their "perfect"
     977                 :             :          * penalty for initial/default split interval indicates that the interval
     978                 :             :          * does not contain even a single split that avoids appending a heap TID.
     979                 :             :          *
     980                 :             :          * Use the leftmost split's lastleft tuple and the rightmost split's
     981                 :             :          * firstright tuple to assess every possible split.
     982                 :             :          */
     983                 :          68 :         leftmost = _bt_split_lastleft(state, leftpage);
     984                 :          68 :         rightmost = _bt_split_firstright(state, rightpage);
     985                 :             : 
     986                 :             :         /*
     987                 :             :          * If page (including new item) has many duplicates but is not entirely
     988                 :             :          * full of duplicates, a many duplicates strategy split will be performed.
     989                 :             :          * If page is entirely full of duplicates, a single value strategy split
     990                 :             :          * will be performed.
     991                 :             :          */
     992                 :          68 :         perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     993         [ +  + ]:          68 :         if (perfectpenalty <= indnkeyatts)
     994                 :             :         {
     995                 :           7 :                 *strategy = SPLIT_MANY_DUPLICATES;
     996                 :             : 
     997                 :             :                 /*
     998                 :             :                  * Many duplicates strategy should split at either side the group of
     999                 :             :                  * duplicates that enclose the delta-optimal split point.  Return
    1000                 :             :                  * indnkeyatts rather than the true perfect penalty to make that
    1001                 :             :                  * happen.  (If perfectpenalty was returned here then low cardinality
    1002                 :             :                  * composite indexes could have continual unbalanced splits.)
    1003                 :             :                  *
    1004                 :             :                  * Note that caller won't go through with a many duplicates split in
    1005                 :             :                  * rare cases where it looks like there are ever-decreasing insertions
    1006                 :             :                  * to the immediate right of the split point.  This must happen just
    1007                 :             :                  * before a final decision is made, within _bt_bestsplitloc().
    1008                 :             :                  */
    1009                 :           7 :                 return indnkeyatts;
    1010                 :             :         }
    1011                 :             : 
    1012                 :             :         /*
    1013                 :             :          * Single value strategy is only appropriate with ever-increasing heap
    1014                 :             :          * TIDs; otherwise, original default strategy split should proceed to
    1015                 :             :          * avoid pathological performance.  Use page high key to infer if this is
    1016                 :             :          * the rightmost page among pages that store the same duplicate value.
    1017                 :             :          * This should not prevent insertions of heap TIDs that are slightly out
    1018                 :             :          * of order from using single value strategy, since that's expected with
    1019                 :             :          * concurrent inserters of the same duplicate value.
    1020                 :             :          */
    1021         [ +  + ]:          61 :         else if (state->is_rightmost)
    1022                 :          33 :                 *strategy = SPLIT_SINGLE_VALUE;
    1023                 :             :         else
    1024                 :             :         {
    1025                 :          28 :                 ItemId          itemid;
    1026                 :          28 :                 IndexTuple      hikey;
    1027                 :             : 
    1028                 :          28 :                 itemid = PageGetItemId(state->origpage, P_HIKEY);
    1029                 :          28 :                 hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1030                 :          56 :                 perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1031                 :          28 :                                                                                          state->newitem);
    1032         [ +  + ]:          28 :                 if (perfectpenalty <= indnkeyatts)
    1033                 :           9 :                         *strategy = SPLIT_SINGLE_VALUE;
    1034                 :             :                 else
    1035                 :             :                 {
    1036                 :             :                         /*
    1037                 :             :                          * Have caller finish split using default strategy, since page
    1038                 :             :                          * does not appear to be the rightmost page for duplicates of the
    1039                 :             :                          * value the page is filled with
    1040                 :             :                          */
    1041                 :             :                 }
    1042                 :          28 :         }
    1043                 :             : 
    1044                 :          61 :         return perfectpenalty;
    1045                 :        2309 : }
    1046                 :             : 
    1047                 :             : /*
    1048                 :             :  * Subroutine to locate leftmost and rightmost splits for current/default
    1049                 :             :  * split interval.  Note that it will be the same split iff there is only one
    1050                 :             :  * split in interval.
    1051                 :             :  */
    1052                 :             : static void
    1053                 :        2258 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1054                 :             :                                    SplitPoint **rightinterval)
    1055                 :             : {
    1056         [ +  - ]:        2258 :         int                     highsplit = Min(state->interval, state->nsplits);
    1057                 :        2258 :         SplitPoint *deltaoptimal;
    1058                 :             : 
    1059                 :        2258 :         deltaoptimal = state->splits;
    1060                 :        2258 :         *leftinterval = NULL;
    1061                 :        2258 :         *rightinterval = NULL;
    1062                 :             : 
    1063                 :             :         /*
    1064                 :             :          * Delta is an absolute distance to optimal split point, so both the
    1065                 :             :          * leftmost and rightmost split point will usually be at the end of the
    1066                 :             :          * array
    1067                 :             :          */
    1068   [ +  -  +  - ]:        6781 :         for (int i = highsplit - 1; i >= 0; i--)
    1069                 :             :         {
    1070                 :        4523 :                 SplitPoint *distant = state->splits + i;
    1071                 :             : 
    1072         [ +  + ]:        4523 :                 if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1073                 :             :                 {
    1074         [ +  + ]:        2215 :                         if (*leftinterval == NULL)
    1075                 :        2188 :                                 *leftinterval = distant;
    1076                 :        2215 :                 }
    1077         [ +  + ]:        2308 :                 else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1078                 :             :                 {
    1079         [ +  + ]:        2236 :                         if (*rightinterval == NULL)
    1080                 :        2198 :                                 *rightinterval = distant;
    1081                 :        2236 :                 }
    1082   [ +  +  +  - ]:          72 :                 else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1083                 :             :                 {
    1084                 :             :                         /*
    1085                 :             :                          * "incoming tuple will become firstright" (distant) is to the
    1086                 :             :                          * left of "incoming tuple will become lastleft" (delta-optimal)
    1087                 :             :                          */
    1088         [ #  # ]:           0 :                         Assert(distant->firstrightoff == state->newitemoff);
    1089         [ #  # ]:           0 :                         if (*leftinterval == NULL)
    1090                 :           0 :                                 *leftinterval = distant;
    1091                 :           0 :                 }
    1092   [ +  +  +  + ]:          72 :                 else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1093                 :             :                 {
    1094                 :             :                         /*
    1095                 :             :                          * "incoming tuple will become lastleft" (distant) is to the right
    1096                 :             :                          * of "incoming tuple will become firstright" (delta-optimal)
    1097                 :             :                          */
    1098         [ -  + ]:           2 :                         Assert(distant->firstrightoff == state->newitemoff);
    1099         [ -  + ]:           2 :                         if (*rightinterval == NULL)
    1100                 :           2 :                                 *rightinterval = distant;
    1101                 :           2 :                 }
    1102                 :             :                 else
    1103                 :             :                 {
    1104                 :             :                         /* There was only one or two splits in initial split interval */
    1105         [ -  + ]:          70 :                         Assert(distant == deltaoptimal);
    1106         [ -  + ]:          70 :                         if (*leftinterval == NULL)
    1107                 :          70 :                                 *leftinterval = distant;
    1108         [ +  + ]:          70 :                         if (*rightinterval == NULL)
    1109                 :          58 :                                 *rightinterval = distant;
    1110                 :             :                 }
    1111                 :             : 
    1112   [ +  +  +  + ]:        4523 :                 if (*leftinterval && *rightinterval)
    1113                 :        2258 :                         return;
    1114         [ +  + ]:        4523 :         }
    1115                 :             : 
    1116                 :           0 :         Assert(false);
    1117         [ -  + ]:        2258 : }
    1118                 :             : 
    1119                 :             : /*
    1120                 :             :  * Subroutine to find penalty for caller's candidate split point.
    1121                 :             :  *
    1122                 :             :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1123                 :             :  * of a split.  It's the last attribute that needs to be included in new high
    1124                 :             :  * key for left page.  It can be greater than the number of key attributes in
    1125                 :             :  * cases where a heap TID will need to be appended during truncation.
    1126                 :             :  *
    1127                 :             :  * On internal pages, penalty is simply the size of the firstright tuple for
    1128                 :             :  * the split (including line pointer overhead).  This tuple will become the
    1129                 :             :  * new high key for the left page.
    1130                 :             :  */
    1131                 :             : static inline int
    1132                 :        4133 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1133                 :             : {
    1134                 :        4133 :         IndexTuple      lastleft;
    1135                 :        4133 :         IndexTuple      firstright;
    1136                 :             : 
    1137         [ +  + ]:        4133 :         if (!state->is_leaf)
    1138                 :             :         {
    1139                 :          51 :                 ItemId          itemid;
    1140                 :             : 
    1141   [ +  -  +  + ]:          51 :                 if (!split->newitemonleft &&
    1142                 :          51 :                         split->firstrightoff == state->newitemoff)
    1143                 :           6 :                         return state->newitemsz;
    1144                 :             : 
    1145                 :          45 :                 itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1146                 :             : 
    1147                 :          45 :                 return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1148                 :          51 :         }
    1149                 :             : 
    1150                 :        4082 :         lastleft = _bt_split_lastleft(state, split);
    1151                 :        4082 :         firstright = _bt_split_firstright(state, split);
    1152                 :             : 
    1153                 :        4082 :         return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1154                 :        4133 : }
    1155                 :             : 
    1156                 :             : /*
    1157                 :             :  * Subroutine to get a lastleft IndexTuple for a split point
    1158                 :             :  */
    1159                 :             : static inline IndexTuple
    1160                 :        6408 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1161                 :             : {
    1162                 :        6408 :         ItemId          itemid;
    1163                 :             : 
    1164   [ +  +  +  + ]:        6408 :         if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1165                 :          56 :                 return state->newitem;
    1166                 :             : 
    1167                 :       12704 :         itemid = PageGetItemId(state->origpage,
    1168                 :        6352 :                                                    OffsetNumberPrev(split->firstrightoff));
    1169                 :        6352 :         return (IndexTuple) PageGetItem(state->origpage, itemid);
    1170                 :        6408 : }
    1171                 :             : 
    1172                 :             : /*
    1173                 :             :  * Subroutine to get a firstright IndexTuple for a split point
    1174                 :             :  */
    1175                 :             : static inline IndexTuple
    1176                 :        6408 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1177                 :             : {
    1178                 :        6408 :         ItemId          itemid;
    1179                 :             : 
    1180   [ +  +  +  + ]:        6408 :         if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1181                 :          19 :                 return state->newitem;
    1182                 :             : 
    1183                 :        6389 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1184                 :        6389 :         return (IndexTuple) PageGetItem(state->origpage, itemid);
    1185                 :        6408 : }
        

Generated by: LCOV version 2.3.2-1