LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 58.0 % 264 153
Test Date: 2026-01-26 10:56:24 Functions: 83.3 % 6 5
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 39.2 % 130 51

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * gistvacuum.c
       4                 :             :  *        vacuuming routines for the postgres GiST index access method.
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/access/gist/gistvacuum.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "access/genam.h"
      18                 :             : #include "access/gist_private.h"
      19                 :             : #include "access/transam.h"
      20                 :             : #include "commands/vacuum.h"
      21                 :             : #include "lib/integerset.h"
      22                 :             : #include "miscadmin.h"
      23                 :             : #include "storage/indexfsm.h"
      24                 :             : #include "storage/lmgr.h"
      25                 :             : #include "storage/read_stream.h"
      26                 :             : #include "utils/memutils.h"
      27                 :             : 
      28                 :             : /* Working state needed by gistbulkdelete */
      29                 :             : typedef struct
      30                 :             : {
      31                 :             :         IndexVacuumInfo *info;
      32                 :             :         IndexBulkDeleteResult *stats;
      33                 :             :         IndexBulkDeleteCallback callback;
      34                 :             :         void       *callback_state;
      35                 :             :         GistNSN         startNSN;
      36                 :             : 
      37                 :             :         /*
      38                 :             :          * These are used to memorize all internal and empty leaf pages.  They are
      39                 :             :          * used for deleting all the empty pages.
      40                 :             :          */
      41                 :             :         IntegerSet *internal_page_set;
      42                 :             :         IntegerSet *empty_leaf_set;
      43                 :             :         MemoryContext page_set_context;
      44                 :             : } GistVacState;
      45                 :             : 
      46                 :             : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      47                 :             :                                                    IndexBulkDeleteCallback callback, void *callback_state);
      48                 :             : static void gistvacuumpage(GistVacState *vstate, Buffer buffer);
      49                 :             : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      50                 :             :                                                                                   GistVacState *vstate);
      51                 :             : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      52                 :             :                                                    Buffer parentBuffer, OffsetNumber downlink,
      53                 :             :                                                    Buffer leafBuffer);
      54                 :             : 
      55                 :             : /*
      56                 :             :  * VACUUM bulkdelete stage: remove index entries.
      57                 :             :  */
      58                 :             : IndexBulkDeleteResult *
      59                 :           6 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      60                 :             :                            IndexBulkDeleteCallback callback, void *callback_state)
      61                 :             : {
      62                 :             :         /* allocate stats if first time through, else re-use existing struct */
      63         [ -  + ]:           6 :         if (stats == NULL)
      64                 :           6 :                 stats = palloc0_object(IndexBulkDeleteResult);
      65                 :             : 
      66                 :           6 :         gistvacuumscan(info, stats, callback, callback_state);
      67                 :             : 
      68                 :           6 :         return stats;
      69                 :             : }
      70                 :             : 
      71                 :             : /*
      72                 :             :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      73                 :             :  */
      74                 :             : IndexBulkDeleteResult *
      75                 :          12 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
      76                 :             : {
      77                 :             :         /* No-op in ANALYZE ONLY mode */
      78         [ +  + ]:          12 :         if (info->analyze_only)
      79                 :           2 :                 return stats;
      80                 :             : 
      81                 :             :         /*
      82                 :             :          * If gistbulkdelete was called, we need not do anything, just return the
      83                 :             :          * stats from the latest gistbulkdelete call.  If it wasn't called, we
      84                 :             :          * still need to do a pass over the index, to obtain index statistics.
      85                 :             :          */
      86         [ +  + ]:          10 :         if (stats == NULL)
      87                 :             :         {
      88                 :           7 :                 stats = palloc0_object(IndexBulkDeleteResult);
      89                 :           7 :                 gistvacuumscan(info, stats, NULL, NULL);
      90                 :           7 :         }
      91                 :             : 
      92                 :             :         /*
      93                 :             :          * It's quite possible for us to be fooled by concurrent page splits into
      94                 :             :          * double-counting some index tuples, so disbelieve any total that exceeds
      95                 :             :          * the underlying heap's count ... if we know that accurately.  Otherwise
      96                 :             :          * this might just make matters worse.
      97                 :             :          */
      98         [ -  + ]:          10 :         if (!info->estimated_count)
      99                 :             :         {
     100         [ +  - ]:          10 :                 if (stats->num_index_tuples > info->num_heap_tuples)
     101                 :           0 :                         stats->num_index_tuples = info->num_heap_tuples;
     102                 :          10 :         }
     103                 :             : 
     104                 :          10 :         return stats;
     105                 :          12 : }
     106                 :             : 
     107                 :             : /*
     108                 :             :  * gistvacuumscan --- scan the index for VACUUMing purposes
     109                 :             :  *
     110                 :             :  * This scans the index for leaf tuples that are deletable according to the
     111                 :             :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     112                 :             :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     113                 :             :  * occurred).
     114                 :             :  *
     115                 :             :  * This also makes note of any empty leaf pages, as well as all internal
     116                 :             :  * pages while looping over all index pages.  After scanning all the pages, we
     117                 :             :  * remove the empty pages so that they can be reused.  Any deleted pages are
     118                 :             :  * added directly to the free space map.  (They should've been added there
     119                 :             :  * when they were originally deleted, already, but it's possible that the FSM
     120                 :             :  * was lost at a crash, for example.)
     121                 :             :  *
     122                 :             :  * The caller is responsible for initially allocating/zeroing a stats struct.
     123                 :             :  */
     124                 :             : static void
     125                 :          13 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     126                 :             :                            IndexBulkDeleteCallback callback, void *callback_state)
     127                 :             : {
     128                 :          13 :         Relation        rel = info->index;
     129                 :          13 :         GistVacState vstate;
     130                 :          13 :         BlockNumber num_pages;
     131                 :          13 :         bool            needLock;
     132                 :          13 :         MemoryContext oldctx;
     133                 :          13 :         BlockRangeReadStreamPrivate p;
     134                 :          13 :         ReadStream *stream = NULL;
     135                 :             : 
     136                 :             :         /*
     137                 :             :          * Reset fields that track information about the entire index now.  This
     138                 :             :          * avoids double-counting in the case where a single VACUUM command
     139                 :             :          * requires multiple scans of the index.
     140                 :             :          *
     141                 :             :          * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
     142                 :             :          * since they track information about the VACUUM command, and so must last
     143                 :             :          * across each call to gistvacuumscan().
     144                 :             :          *
     145                 :             :          * (Note that pages_free is treated as state about the whole index, not
     146                 :             :          * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
     147                 :             :          * calls are idempotent, and get repeated for the same deleted pages in
     148                 :             :          * some scenarios.  The point for us is to track the number of recyclable
     149                 :             :          * pages in the index at the end of the VACUUM command.)
     150                 :             :          */
     151                 :          13 :         stats->num_pages = 0;
     152                 :          13 :         stats->estimated_count = false;
     153                 :          13 :         stats->num_index_tuples = 0;
     154                 :          13 :         stats->pages_deleted = 0;
     155                 :          13 :         stats->pages_free = 0;
     156                 :             : 
     157                 :             :         /*
     158                 :             :          * Create the integer sets to remember all the internal and the empty leaf
     159                 :             :          * pages in page_set_context.  Internally, the integer set will remember
     160                 :             :          * this context so that the subsequent allocations for these integer sets
     161                 :             :          * will be done from the same context.
     162                 :             :          *
     163                 :             :          * XXX the allocation sizes used below pre-date generation context's block
     164                 :             :          * growing code.  These values should likely be benchmarked and set to
     165                 :             :          * more suitable values.
     166                 :             :          */
     167                 :          13 :         vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
     168                 :             :                                                                                                           "GiST VACUUM page set context",
     169                 :             :                                                                                                           16 * 1024,
     170                 :             :                                                                                                           16 * 1024,
     171                 :             :                                                                                                           16 * 1024);
     172                 :          13 :         oldctx = MemoryContextSwitchTo(vstate.page_set_context);
     173                 :          13 :         vstate.internal_page_set = intset_create();
     174                 :          13 :         vstate.empty_leaf_set = intset_create();
     175                 :          13 :         MemoryContextSwitchTo(oldctx);
     176                 :             : 
     177                 :             :         /* Set up info to pass down to gistvacuumpage */
     178                 :          13 :         vstate.info = info;
     179                 :          13 :         vstate.stats = stats;
     180                 :          13 :         vstate.callback = callback;
     181                 :          13 :         vstate.callback_state = callback_state;
     182   [ +  -  +  -  :          13 :         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     183                 :          13 :                 vstate.startNSN = GetInsertRecPtr();
     184                 :             :         else
     185                 :           0 :                 vstate.startNSN = gistGetFakeLSN(rel);
     186                 :             : 
     187                 :             :         /*
     188                 :             :          * The outer loop iterates over all index pages, in physical order (we
     189                 :             :          * hope the kernel will cooperate in providing read-ahead for speed).  It
     190                 :             :          * is critical that we visit all leaf pages, including ones added after we
     191                 :             :          * start the scan, else we might fail to delete some deletable tuples.
     192                 :             :          * Hence, we must repeatedly check the relation length.  We must acquire
     193                 :             :          * the relation-extension lock while doing so to avoid a race condition:
     194                 :             :          * if someone else is extending the relation, there is a window where
     195                 :             :          * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     196                 :             :          * write-locked by gistNewBuffer().  If we manage to scan such a page
     197                 :             :          * here, we'll improperly assume it can be recycled.  Taking the lock
     198                 :             :          * synchronizes things enough to prevent a problem: either num_pages won't
     199                 :             :          * include the new page, or gistNewBuffer already has write lock on the
     200                 :             :          * buffer and it will be fully initialized before we can examine it.  (See
     201                 :             :          * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     202                 :             :          * if a page is added immediately after we look; the page splitting code
     203                 :             :          * already has write-lock on the left page before it adds a right page, so
     204                 :             :          * we must already have processed any tuples due to be moved into such a
     205                 :             :          * page.
     206                 :             :          *
     207                 :             :          * We can skip locking for new or temp relations, however, since no one
     208                 :             :          * else could be accessing them.
     209                 :             :          */
     210         [ -  + ]:          13 :         needLock = !RELATION_IS_LOCAL(rel);
     211                 :             : 
     212                 :          13 :         p.current_blocknum = GIST_ROOT_BLKNO;
     213                 :             : 
     214                 :             :         /*
     215                 :             :          * It is safe to use batchmode as block_range_read_stream_cb takes no
     216                 :             :          * locks.
     217                 :             :          */
     218                 :          13 :         stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
     219                 :             :                                                                                 READ_STREAM_FULL |
     220                 :             :                                                                                 READ_STREAM_USE_BATCHING,
     221                 :          13 :                                                                                 info->strategy,
     222                 :          13 :                                                                                 rel,
     223                 :             :                                                                                 MAIN_FORKNUM,
     224                 :             :                                                                                 block_range_read_stream_cb,
     225                 :             :                                                                                 &p,
     226                 :             :                                                                                 0);
     227                 :          26 :         for (;;)
     228                 :             :         {
     229                 :             :                 /* Get the current relation length */
     230         [ -  + ]:          26 :                 if (needLock)
     231                 :          26 :                         LockRelationForExtension(rel, ExclusiveLock);
     232                 :          26 :                 num_pages = RelationGetNumberOfBlocks(rel);
     233         [ -  + ]:          26 :                 if (needLock)
     234                 :          26 :                         UnlockRelationForExtension(rel, ExclusiveLock);
     235                 :             : 
     236                 :             :                 /* Quit if we've scanned the whole relation */
     237         [ +  + ]:          26 :                 if (p.current_blocknum >= num_pages)
     238                 :          13 :                         break;
     239                 :             : 
     240                 :          13 :                 p.last_exclusive = num_pages;
     241                 :             : 
     242                 :             :                 /* Iterate over pages, then loop back to recheck relation length */
     243                 :         337 :                 while (true)
     244                 :             :                 {
     245                 :         337 :                         Buffer          buf;
     246                 :             : 
     247                 :             :                         /* call vacuum_delay_point while not holding any buffer lock */
     248                 :         337 :                         vacuum_delay_point(false);
     249                 :             : 
     250                 :         337 :                         buf = read_stream_next_buffer(stream, NULL);
     251                 :             : 
     252         [ +  + ]:         337 :                         if (!BufferIsValid(buf))
     253                 :          13 :                                 break;
     254                 :             : 
     255                 :         324 :                         gistvacuumpage(&vstate, buf);
     256      [ -  +  + ]:         337 :                 }
     257                 :             : 
     258                 :             :                 /*
     259                 :             :                  * We have to reset the read stream to use it again. After returning
     260                 :             :                  * InvalidBuffer, the read stream API won't invoke our callback again
     261                 :             :                  * until the stream has been reset.
     262                 :             :                  */
     263                 :          13 :                 read_stream_reset(stream);
     264                 :             :         }
     265                 :             : 
     266                 :          13 :         read_stream_end(stream);
     267                 :             : 
     268                 :             :         /*
     269                 :             :          * If we found any recyclable pages (and recorded them in the FSM), then
     270                 :             :          * forcibly update the upper-level FSM pages to ensure that searchers can
     271                 :             :          * find them.  It's possible that the pages were also found during
     272                 :             :          * previous scans and so this is a waste of time, but it's cheap enough
     273                 :             :          * relative to scanning the index that it shouldn't matter much, and
     274                 :             :          * making sure that free pages are available sooner not later seems
     275                 :             :          * worthwhile.
     276                 :             :          *
     277                 :             :          * Note that if no recyclable pages exist, we don't bother vacuuming the
     278                 :             :          * FSM at all.
     279                 :             :          */
     280         [ +  - ]:          13 :         if (stats->pages_free > 0)
     281                 :           0 :                 IndexFreeSpaceMapVacuum(rel);
     282                 :             : 
     283                 :             :         /* update statistics */
     284                 :          13 :         stats->num_pages = num_pages;
     285                 :             : 
     286                 :             :         /*
     287                 :             :          * If we saw any empty pages, try to unlink them from the tree so that
     288                 :             :          * they can be reused.
     289                 :             :          */
     290                 :          13 :         gistvacuum_delete_empty_pages(info, &vstate);
     291                 :             : 
     292                 :             :         /* we don't need the internal and empty page sets anymore */
     293                 :          13 :         MemoryContextDelete(vstate.page_set_context);
     294                 :          13 :         vstate.page_set_context = NULL;
     295                 :          13 :         vstate.internal_page_set = NULL;
     296                 :          13 :         vstate.empty_leaf_set = NULL;
     297                 :          13 : }
     298                 :             : 
     299                 :             : /*
     300                 :             :  * gistvacuumpage --- VACUUM one page
     301                 :             :  *
     302                 :             :  * This processes a single page for gistbulkdelete(). `buffer` contains the
     303                 :             :  * page to process. In some cases we must go back and reexamine
     304                 :             :  * previously-scanned pages; this routine recurses when necessary to handle
     305                 :             :  * that case.
     306                 :             :  */
     307                 :             : static void
     308                 :         324 : gistvacuumpage(GistVacState *vstate, Buffer buffer)
     309                 :             : {
     310                 :         324 :         IndexVacuumInfo *info = vstate->info;
     311                 :         324 :         IndexBulkDeleteCallback callback = vstate->callback;
     312                 :         324 :         void       *callback_state = vstate->callback_state;
     313                 :         324 :         Relation        rel = info->index;
     314                 :         324 :         BlockNumber orig_blkno = BufferGetBlockNumber(buffer);
     315                 :         324 :         Page            page;
     316                 :         324 :         BlockNumber recurse_to;
     317                 :             : 
     318                 :             :         /*
     319                 :             :          * orig_blkno is the highest block number reached by the outer
     320                 :             :          * gistvacuumscan() loop. This will be the same as blkno unless we are
     321                 :             :          * recursing to reexamine a previous page.
     322                 :             :          */
     323                 :         324 :         BlockNumber blkno = orig_blkno;
     324                 :             : 
     325                 :             : restart:
     326                 :         324 :         recurse_to = InvalidBlockNumber;
     327                 :             : 
     328                 :             :         /*
     329                 :             :          * We are not going to stay here for a long time, aggressively grab an
     330                 :             :          * exclusive lock.
     331                 :             :          */
     332                 :         324 :         LockBuffer(buffer, GIST_EXCLUSIVE);
     333                 :         324 :         page = BufferGetPage(buffer);
     334                 :             : 
     335         [ -  + ]:         324 :         if (gistPageRecyclable(page))
     336                 :             :         {
     337                 :             :                 /* Okay to recycle this page */
     338                 :           0 :                 RecordFreeIndexPage(rel, blkno);
     339                 :           0 :                 vstate->stats->pages_deleted++;
     340                 :           0 :                 vstate->stats->pages_free++;
     341                 :           0 :         }
     342         [ -  + ]:         324 :         else if (GistPageIsDeleted(page))
     343                 :             :         {
     344                 :             :                 /* Already deleted, but can't recycle yet */
     345                 :           0 :                 vstate->stats->pages_deleted++;
     346                 :           0 :         }
     347         [ +  + ]:         324 :         else if (GistPageIsLeaf(page))
     348                 :             :         {
     349                 :         316 :                 OffsetNumber todelete[MaxOffsetNumber];
     350                 :         316 :                 int                     ntodelete = 0;
     351                 :         316 :                 int                     nremain;
     352                 :         316 :                 GISTPageOpaque opaque = GistPageGetOpaque(page);
     353                 :         316 :                 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     354                 :             : 
     355                 :             :                 /*
     356                 :             :                  * Check whether we need to recurse back to earlier pages.  What we
     357                 :             :                  * are concerned about is a page split that happened since we started
     358                 :             :                  * the vacuum scan.  If the split moved some tuples to a lower page
     359                 :             :                  * then we might have missed 'em.  If so, set up for tail recursion.
     360                 :             :                  *
     361                 :             :                  * This is similar to the checks we do during searches, when following
     362                 :             :                  * a downlink, but we don't need to jump to higher-numbered pages,
     363                 :             :                  * because we will process them later, anyway.
     364                 :             :                  */
     365         [ +  - ]:         316 :                 if ((GistFollowRight(page) ||
     366                 :         316 :                          vstate->startNSN < GistPageGetNSN(page)) &&
     367   [ -  +  #  # ]:         316 :                         (opaque->rightlink != InvalidBlockNumber) &&
     368                 :           0 :                         (opaque->rightlink < orig_blkno))
     369                 :             :                 {
     370                 :           0 :                         recurse_to = opaque->rightlink;
     371                 :           0 :                 }
     372                 :             : 
     373                 :             :                 /*
     374                 :             :                  * Scan over all items to see which ones need to be deleted according
     375                 :             :                  * to the callback function.
     376                 :             :                  */
     377         [ +  + ]:         316 :                 if (callback)
     378                 :             :                 {
     379                 :          21 :                         OffsetNumber off;
     380                 :             : 
     381         [ +  + ]:        2039 :                         for (off = FirstOffsetNumber;
     382                 :        2039 :                                  off <= maxoff;
     383                 :        2018 :                                  off = OffsetNumberNext(off))
     384                 :             :                         {
     385                 :        2018 :                                 ItemId          iid = PageGetItemId(page, off);
     386                 :        2018 :                                 IndexTuple      idxtuple = (IndexTuple) PageGetItem(page, iid);
     387                 :             : 
     388         [ +  + ]:        2018 :                                 if (callback(&(idxtuple->t_tid), callback_state))
     389                 :        1002 :                                         todelete[ntodelete++] = off;
     390                 :        2018 :                         }
     391                 :          21 :                 }
     392                 :             : 
     393                 :             :                 /*
     394                 :             :                  * Apply any needed deletes.  We issue just one WAL record per page,
     395                 :             :                  * so as to minimize WAL traffic.
     396                 :             :                  */
     397         [ +  + ]:         316 :                 if (ntodelete > 0)
     398                 :             :                 {
     399                 :          18 :                         START_CRIT_SECTION();
     400                 :             : 
     401                 :          18 :                         MarkBufferDirty(buffer);
     402                 :             : 
     403                 :          18 :                         PageIndexMultiDelete(page, todelete, ntodelete);
     404                 :          18 :                         GistMarkTuplesDeleted(page);
     405                 :             : 
     406   [ +  -  +  -  :          18 :                         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     407                 :             :                         {
     408                 :          18 :                                 XLogRecPtr      recptr;
     409                 :             : 
     410                 :          36 :                                 recptr = gistXLogUpdate(buffer,
     411                 :          18 :                                                                                 todelete, ntodelete,
     412                 :             :                                                                                 NULL, 0, InvalidBuffer);
     413                 :          18 :                                 PageSetLSN(page, recptr);
     414                 :          18 :                         }
     415                 :             :                         else
     416                 :           0 :                                 PageSetLSN(page, gistGetFakeLSN(rel));
     417                 :             : 
     418         [ -  + ]:          18 :                         END_CRIT_SECTION();
     419                 :             : 
     420                 :          18 :                         vstate->stats->tuples_removed += ntodelete;
     421                 :             :                         /* must recompute maxoff */
     422                 :          18 :                         maxoff = PageGetMaxOffsetNumber(page);
     423                 :          18 :                 }
     424                 :             : 
     425                 :         316 :                 nremain = maxoff - FirstOffsetNumber + 1;
     426         [ +  + ]:         316 :                 if (nremain == 0)
     427                 :             :                 {
     428                 :             :                         /*
     429                 :             :                          * The page is now completely empty.  Remember its block number,
     430                 :             :                          * so that we will try to delete the page in the second stage.
     431                 :             :                          *
     432                 :             :                          * Skip this when recursing, because IntegerSet requires that the
     433                 :             :                          * values are added in ascending order.  The next VACUUM will pick
     434                 :             :                          * it up.
     435                 :             :                          */
     436         [ -  + ]:           2 :                         if (blkno == orig_blkno)
     437                 :           2 :                                 intset_add_member(vstate->empty_leaf_set, blkno);
     438                 :           2 :                 }
     439                 :             :                 else
     440                 :         314 :                         vstate->stats->num_index_tuples += nremain;
     441                 :         316 :         }
     442                 :             :         else
     443                 :             :         {
     444                 :             :                 /*
     445                 :             :                  * On an internal page, check for "invalid tuples", left behind by an
     446                 :             :                  * incomplete page split on PostgreSQL 9.0 or below.  These are not
     447                 :             :                  * created by newer PostgreSQL versions, but unfortunately, there is
     448                 :             :                  * no version number anywhere in a GiST index, so we don't know
     449                 :             :                  * whether this index might still contain invalid tuples or not.
     450                 :             :                  */
     451                 :           8 :                 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     452                 :           8 :                 OffsetNumber off;
     453                 :             : 
     454         [ +  + ]:         319 :                 for (off = FirstOffsetNumber;
     455                 :         319 :                          off <= maxoff;
     456                 :         311 :                          off = OffsetNumberNext(off))
     457                 :             :                 {
     458                 :         311 :                         ItemId          iid = PageGetItemId(page, off);
     459                 :         311 :                         IndexTuple      idxtuple = (IndexTuple) PageGetItem(page, iid);
     460                 :             : 
     461         [ +  - ]:         311 :                         if (GistTupleIsInvalid(idxtuple))
     462   [ #  #  #  # ]:           0 :                                 ereport(LOG,
     463                 :             :                                                 (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     464                 :             :                                                                 RelationGetRelationName(rel)),
     465                 :             :                                                  errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     466                 :             :                                                  errhint("Please REINDEX it.")));
     467                 :         311 :                 }
     468                 :             : 
     469                 :             :                 /*
     470                 :             :                  * Remember the block number of this page, so that we can revisit it
     471                 :             :                  * later in gistvacuum_delete_empty_pages(), when we search for
     472                 :             :                  * parents of empty leaf pages.
     473                 :             :                  */
     474         [ -  + ]:           8 :                 if (blkno == orig_blkno)
     475                 :           8 :                         intset_add_member(vstate->internal_page_set, blkno);
     476                 :           8 :         }
     477                 :             : 
     478                 :         324 :         UnlockReleaseBuffer(buffer);
     479                 :             : 
     480                 :             :         /*
     481                 :             :          * This is really tail recursion, but if the compiler is too stupid to
     482                 :             :          * optimize it as such, we'd eat an uncomfortably large amount of stack
     483                 :             :          * space per recursion level (due to the deletable[] array).  A failure is
     484                 :             :          * improbable since the number of levels isn't likely to be large ... but
     485                 :             :          * just in case, let's hand-optimize into a loop.
     486                 :             :          */
     487         [ -  + ]:         324 :         if (recurse_to != InvalidBlockNumber)
     488                 :             :         {
     489                 :           0 :                 blkno = recurse_to;
     490                 :             : 
     491                 :             :                 /* check for vacuum delay while not holding any buffer lock */
     492                 :           0 :                 vacuum_delay_point(false);
     493                 :             : 
     494                 :           0 :                 buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     495                 :           0 :                                                                         info->strategy);
     496                 :           0 :                 goto restart;
     497                 :             :         }
     498                 :         324 : }
     499                 :             : 
     500                 :             : /*
     501                 :             :  * Scan all internal pages, and try to delete their empty child pages.
     502                 :             :  */
     503                 :             : static void
     504                 :          13 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
     505                 :             : {
     506                 :          13 :         Relation        rel = info->index;
     507                 :          13 :         BlockNumber empty_pages_remaining;
     508                 :          13 :         uint64          blkno;
     509                 :             : 
     510                 :             :         /*
     511                 :             :          * Rescan all inner pages to find those that have empty child pages.
     512                 :             :          */
     513                 :          13 :         empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
     514                 :          13 :         intset_begin_iterate(vstate->internal_page_set);
     515   [ +  +  -  + ]:          15 :         while (empty_pages_remaining > 0 &&
     516                 :           2 :                    intset_iterate_next(vstate->internal_page_set, &blkno))
     517                 :             :         {
     518                 :           0 :                 Buffer          buffer;
     519                 :           0 :                 Page            page;
     520                 :           0 :                 OffsetNumber off,
     521                 :             :                                         maxoff;
     522                 :           0 :                 OffsetNumber todelete[MaxOffsetNumber];
     523                 :           0 :                 BlockNumber leafs_to_delete[MaxOffsetNumber];
     524                 :           0 :                 int                     ntodelete;
     525                 :           0 :                 int                     deleted;
     526                 :             : 
     527                 :           0 :                 buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     528                 :           0 :                                                                         RBM_NORMAL, info->strategy);
     529                 :             : 
     530                 :           0 :                 LockBuffer(buffer, GIST_SHARE);
     531                 :           0 :                 page = BufferGetPage(buffer);
     532                 :             : 
     533         [ #  # ]:           0 :                 if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     534                 :             :                 {
     535                 :             :                         /*
     536                 :             :                          * This page was an internal page earlier, but now it's something
     537                 :             :                          * else. Shouldn't happen...
     538                 :             :                          */
     539                 :           0 :                         Assert(false);
     540                 :           0 :                         UnlockReleaseBuffer(buffer);
     541                 :           0 :                         continue;
     542                 :             :                 }
     543                 :             : 
     544                 :             :                 /*
     545                 :             :                  * Scan all the downlinks, and see if any of them point to empty leaf
     546                 :             :                  * pages.
     547                 :             :                  */
     548                 :           0 :                 maxoff = PageGetMaxOffsetNumber(page);
     549                 :           0 :                 ntodelete = 0;
     550         [ #  # ]:           0 :                 for (off = FirstOffsetNumber;
     551         [ #  # ]:           0 :                          off <= maxoff && ntodelete < maxoff - 1;
     552                 :           0 :                          off = OffsetNumberNext(off))
     553                 :             :                 {
     554                 :           0 :                         ItemId          iid = PageGetItemId(page, off);
     555                 :           0 :                         IndexTuple      idxtuple = (IndexTuple) PageGetItem(page, iid);
     556                 :           0 :                         BlockNumber leafblk;
     557                 :             : 
     558                 :           0 :                         leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     559         [ #  # ]:           0 :                         if (intset_is_member(vstate->empty_leaf_set, leafblk))
     560                 :             :                         {
     561                 :           0 :                                 leafs_to_delete[ntodelete] = leafblk;
     562                 :           0 :                                 todelete[ntodelete++] = off;
     563                 :           0 :                         }
     564                 :           0 :                 }
     565                 :             : 
     566                 :             :                 /*
     567                 :             :                  * In order to avoid deadlock, child page must be locked before
     568                 :             :                  * parent, so we must release the lock on the parent, lock the child,
     569                 :             :                  * and then re-acquire the lock the parent.  (And we wouldn't want to
     570                 :             :                  * do I/O, while holding a lock, anyway.)
     571                 :             :                  *
     572                 :             :                  * At the instant that we're not holding a lock on the parent, the
     573                 :             :                  * downlink might get moved by a concurrent insert, so we must
     574                 :             :                  * re-check that it still points to the same child page after we have
     575                 :             :                  * acquired both locks.  Also, another backend might have inserted a
     576                 :             :                  * tuple to the page, so that it is no longer empty.  gistdeletepage()
     577                 :             :                  * re-checks all these conditions.
     578                 :             :                  */
     579                 :           0 :                 LockBuffer(buffer, GIST_UNLOCK);
     580                 :             : 
     581                 :           0 :                 deleted = 0;
     582         [ #  # ]:           0 :                 for (int i = 0; i < ntodelete; i++)
     583                 :             :                 {
     584                 :           0 :                         Buffer          leafbuf;
     585                 :             : 
     586                 :             :                         /*
     587                 :             :                          * Don't remove the last downlink from the parent.  That would
     588                 :             :                          * confuse the insertion code.
     589                 :             :                          */
     590         [ #  # ]:           0 :                         if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     591                 :           0 :                                 break;
     592                 :             : 
     593                 :           0 :                         leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     594                 :           0 :                                                                                  RBM_NORMAL, info->strategy);
     595                 :           0 :                         LockBuffer(leafbuf, GIST_EXCLUSIVE);
     596                 :           0 :                         gistcheckpage(rel, leafbuf);
     597                 :             : 
     598                 :           0 :                         LockBuffer(buffer, GIST_EXCLUSIVE);
     599   [ #  #  #  # ]:           0 :                         if (gistdeletepage(info, vstate->stats,
     600                 :           0 :                                                            buffer, todelete[i] - deleted,
     601                 :           0 :                                                            leafbuf))
     602                 :           0 :                                 deleted++;
     603                 :           0 :                         LockBuffer(buffer, GIST_UNLOCK);
     604                 :             : 
     605                 :           0 :                         UnlockReleaseBuffer(leafbuf);
     606         [ #  # ]:           0 :                 }
     607                 :             : 
     608                 :           0 :                 ReleaseBuffer(buffer);
     609                 :             : 
     610                 :             :                 /*
     611                 :             :                  * We can stop the scan as soon as we have seen the downlinks, even if
     612                 :             :                  * we were not able to remove them all.
     613                 :             :                  */
     614                 :           0 :                 empty_pages_remaining -= ntodelete;
     615      [ #  #  # ]:           0 :         }
     616                 :          13 : }
     617                 :             : 
     618                 :             : /*
     619                 :             :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     620                 :             :  * leaf.  Both pages must be locked.
     621                 :             :  *
     622                 :             :  * Even if the page was empty when we first saw it, a concurrent inserter might
     623                 :             :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     624                 :             :  * We re-check all the conditions, to make sure the page is still deletable,
     625                 :             :  * before modifying anything.
     626                 :             :  *
     627                 :             :  * Returns true, if the page was deleted, and false if a concurrent update
     628                 :             :  * prevented it.
     629                 :             :  */
     630                 :             : static bool
     631                 :           0 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     632                 :             :                            Buffer parentBuffer, OffsetNumber downlink,
     633                 :             :                            Buffer leafBuffer)
     634                 :             : {
     635                 :           0 :         Page            parentPage = BufferGetPage(parentBuffer);
     636                 :           0 :         Page            leafPage = BufferGetPage(leafBuffer);
     637                 :           0 :         ItemId          iid;
     638                 :           0 :         IndexTuple      idxtuple;
     639                 :           0 :         XLogRecPtr      recptr;
     640                 :           0 :         FullTransactionId txid;
     641                 :             : 
     642                 :             :         /*
     643                 :             :          * Check that the leaf is still empty and deletable.
     644                 :             :          */
     645         [ #  # ]:           0 :         if (!GistPageIsLeaf(leafPage))
     646                 :             :         {
     647                 :             :                 /* a leaf page should never become a non-leaf page */
     648                 :           0 :                 Assert(false);
     649                 :           0 :                 return false;
     650                 :             :         }
     651                 :             : 
     652         [ #  # ]:           0 :         if (GistFollowRight(leafPage))
     653                 :           0 :                 return false;                   /* don't mess with a concurrent page split */
     654                 :             : 
     655         [ #  # ]:           0 :         if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     656                 :           0 :                 return false;                   /* not empty anymore */
     657                 :             : 
     658                 :             :         /*
     659                 :             :          * Ok, the leaf is deletable.  Is the downlink in the parent page still
     660                 :             :          * valid?  It might have been moved by a concurrent insert.  We could try
     661                 :             :          * to re-find it by scanning the page again, possibly moving right if the
     662                 :             :          * was split.  But for now, let's keep it simple and just give up.  The
     663                 :             :          * next VACUUM will pick it up.
     664                 :             :          */
     665         [ #  # ]:           0 :         if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     666                 :           0 :                 GistPageIsLeaf(parentPage))
     667                 :             :         {
     668                 :             :                 /* shouldn't happen, internal pages are never deleted */
     669                 :           0 :                 Assert(false);
     670                 :           0 :                 return false;
     671                 :             :         }
     672                 :             : 
     673                 :           0 :         if (PageGetMaxOffsetNumber(parentPage) < downlink
     674   [ #  #  #  # ]:           0 :                 || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     675                 :           0 :                 return false;
     676                 :             : 
     677                 :           0 :         iid = PageGetItemId(parentPage, downlink);
     678                 :           0 :         idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     679   [ #  #  #  # ]:           0 :         if (BufferGetBlockNumber(leafBuffer) !=
     680                 :           0 :                 ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     681                 :           0 :                 return false;
     682                 :             : 
     683                 :             :         /*
     684                 :             :          * All good, proceed with the deletion.
     685                 :             :          *
     686                 :             :          * The page cannot be immediately recycled, because in-progress scans that
     687                 :             :          * saw the downlink might still visit it.  Mark the page with the current
     688                 :             :          * next-XID counter, so that we know when it can be recycled.  Once that
     689                 :             :          * XID becomes older than GlobalXmin, we know that all scans that are
     690                 :             :          * currently in progress must have ended.  (That's much more conservative
     691                 :             :          * than needed, but let's keep it safe and simple.)
     692                 :             :          */
     693                 :           0 :         txid = ReadNextFullTransactionId();
     694                 :             : 
     695                 :           0 :         START_CRIT_SECTION();
     696                 :             : 
     697                 :             :         /* mark the page as deleted */
     698                 :           0 :         MarkBufferDirty(leafBuffer);
     699                 :           0 :         GistPageSetDeleted(leafPage, txid);
     700                 :           0 :         stats->pages_newly_deleted++;
     701                 :           0 :         stats->pages_deleted++;
     702                 :             : 
     703                 :             :         /* remove the downlink from the parent */
     704                 :           0 :         MarkBufferDirty(parentBuffer);
     705                 :           0 :         PageIndexTupleDelete(parentPage, downlink);
     706                 :             : 
     707   [ #  #  #  #  :           0 :         if (RelationNeedsWAL(info->index))
             #  #  #  # ]
     708                 :           0 :                 recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     709                 :             :         else
     710                 :           0 :                 recptr = gistGetFakeLSN(info->index);
     711                 :           0 :         PageSetLSN(parentPage, recptr);
     712                 :           0 :         PageSetLSN(leafPage, recptr);
     713                 :             : 
     714         [ #  # ]:           0 :         END_CRIT_SECTION();
     715                 :             : 
     716                 :           0 :         return true;
     717                 :           0 : }
        

Generated by: LCOV version 2.3.2-1