LCOV - code coverage report
Current view: top level - src/backend/access/hash - hashovfl.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 91.2 % 512 467
Test Date: 2026-01-26 10:56:24 Functions: 100.0 % 7 7
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 64.5 % 251 162

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * hashovfl.c
       4                 :             :  *        Overflow page management code for the Postgres hash access method
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/access/hash/hashovfl.c
      12                 :             :  *
      13                 :             :  * NOTES
      14                 :             :  *        Overflow pages look like ordinary relation pages.
      15                 :             :  *
      16                 :             :  *-------------------------------------------------------------------------
      17                 :             :  */
      18                 :             : #include "postgres.h"
      19                 :             : 
      20                 :             : #include "access/hash.h"
      21                 :             : #include "access/hash_xlog.h"
      22                 :             : #include "access/xloginsert.h"
      23                 :             : #include "miscadmin.h"
      24                 :             : #include "utils/rel.h"
      25                 :             : 
      26                 :             : 
      27                 :             : static uint32 _hash_firstfreebit(uint32 map);
      28                 :             : 
      29                 :             : 
      30                 :             : /*
      31                 :             :  * Convert overflow page bit number (its index in the free-page bitmaps)
      32                 :             :  * to block number within the index.
      33                 :             :  */
      34                 :             : static BlockNumber
      35                 :          67 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
      36                 :             : {
      37                 :          67 :         uint32          splitnum = metap->hashm_ovflpoint;
      38                 :          67 :         uint32          i;
      39                 :             : 
      40                 :             :         /* Convert zero-based bitnumber to 1-based page number */
      41                 :          67 :         ovflbitnum += 1;
      42                 :             : 
      43                 :             :         /* Determine the split number for this page (must be >= 1) */
      44         [ +  + ]:         432 :         for (i = 1;
      45         [ +  + ]:         365 :                  i < splitnum && ovflbitnum > metap->hashm_spares[i];
      46                 :         298 :                  i++)
      47                 :             :                  /* loop */ ;
      48                 :             : 
      49                 :             :         /*
      50                 :             :          * Convert to absolute page number by adding the number of bucket pages
      51                 :             :          * that exist before this split point.
      52                 :             :          */
      53                 :         134 :         return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
      54                 :          67 : }
      55                 :             : 
      56                 :             : /*
      57                 :             :  * _hash_ovflblkno_to_bitno
      58                 :             :  *
      59                 :             :  * Convert overflow page block number to bit number for free-page bitmap.
      60                 :             :  */
      61                 :             : uint32
      62                 :          31 : _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
      63                 :             : {
      64                 :          31 :         uint32          splitnum = metap->hashm_ovflpoint;
      65                 :          31 :         uint32          i;
      66                 :          31 :         uint32          bitnum;
      67                 :             : 
      68                 :             :         /* Determine the split number containing this page */
      69         [ +  - ]:         116 :         for (i = 1; i <= splitnum; i++)
      70                 :             :         {
      71         [ -  + ]:         116 :                 if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
      72                 :           0 :                         break;                          /* oops */
      73                 :         116 :                 bitnum = ovflblkno - _hash_get_totalbuckets(i);
      74                 :             : 
      75                 :             :                 /*
      76                 :             :                  * bitnum has to be greater than number of overflow page added in
      77                 :             :                  * previous split point. The overflow page at this splitnum (i) if any
      78                 :             :                  * should start from (_hash_get_totalbuckets(i) +
      79                 :             :                  * metap->hashm_spares[i - 1] + 1).
      80                 :             :                  */
      81   [ +  -  +  + ]:         116 :                 if (bitnum > metap->hashm_spares[i - 1] &&
      82                 :         116 :                         bitnum <= metap->hashm_spares[i])
      83                 :          31 :                         return bitnum - 1;      /* -1 to convert 1-based to 0-based */
      84                 :          85 :         }
      85                 :             : 
      86   [ #  #  #  # ]:           0 :         ereport(ERROR,
      87                 :             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      88                 :             :                          errmsg("invalid overflow block number %u", ovflblkno)));
      89                 :           0 :         return 0;                                       /* keep compiler quiet */
      90                 :          31 : }
      91                 :             : 
      92                 :             : /*
      93                 :             :  *      _hash_addovflpage
      94                 :             :  *
      95                 :             :  *      Add an overflow page to the bucket whose last page is pointed to by 'buf'.
      96                 :             :  *
      97                 :             :  *      On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
      98                 :             :  *      dropped before exiting (we assume the caller is not interested in 'buf'
      99                 :             :  *      anymore) if not asked to retain.  The pin will be retained only for the
     100                 :             :  *      primary bucket.  The returned overflow page will be pinned and
     101                 :             :  *      write-locked; it is guaranteed to be empty.
     102                 :             :  *
     103                 :             :  *      The caller must hold a pin, but no lock, on the metapage buffer.
     104                 :             :  *      That buffer is returned in the same state.
     105                 :             :  *
     106                 :             :  * NB: since this could be executed concurrently by multiple processes,
     107                 :             :  * one should not assume that the returned overflow page will be the
     108                 :             :  * immediate successor of the originally passed 'buf'.  Additional overflow
     109                 :             :  * pages might have been added to the bucket chain in between.
     110                 :             :  */
     111                 :             : Buffer
     112                 :          67 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
     113                 :             : {
     114                 :          67 :         Buffer          ovflbuf;
     115                 :          67 :         Page            page;
     116                 :          67 :         Page            ovflpage;
     117                 :          67 :         HashPageOpaque pageopaque;
     118                 :          67 :         HashPageOpaque ovflopaque;
     119                 :          67 :         HashMetaPage metap;
     120                 :          67 :         Buffer          mapbuf = InvalidBuffer;
     121                 :          67 :         Buffer          newmapbuf = InvalidBuffer;
     122                 :          67 :         BlockNumber blkno;
     123                 :          67 :         uint32          orig_firstfree;
     124                 :          67 :         uint32          splitnum;
     125                 :          67 :         uint32     *freep = NULL;
     126                 :          67 :         uint32          max_ovflpg;
     127                 :          67 :         uint32          bit;
     128                 :          67 :         uint32          bitmap_page_bit;
     129                 :          67 :         uint32          first_page;
     130                 :          67 :         uint32          last_bit;
     131                 :          67 :         uint32          last_page;
     132                 :          67 :         uint32          i,
     133                 :             :                                 j;
     134                 :          67 :         bool            page_found = false;
     135                 :             : 
     136                 :             :         /*
     137                 :             :          * Write-lock the tail page.  Here, we need to maintain locking order such
     138                 :             :          * that, first acquire the lock on tail page of bucket, then on meta page
     139                 :             :          * to find and lock the bitmap page and if it is found, then lock on meta
     140                 :             :          * page is released, then finally acquire the lock on new overflow buffer.
     141                 :             :          * We need this locking order to avoid deadlock with backends that are
     142                 :             :          * doing inserts.
     143                 :             :          *
     144                 :             :          * Note: We could have avoided locking many buffers here if we made two
     145                 :             :          * WAL records for acquiring an overflow page (one to allocate an overflow
     146                 :             :          * page and another to add it to overflow bucket chain).  However, doing
     147                 :             :          * so can leak an overflow page, if the system crashes after allocation.
     148                 :             :          * Needless to say, it is better to have a single record from a
     149                 :             :          * performance point of view as well.
     150                 :             :          */
     151                 :          67 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     152                 :             : 
     153                 :             :         /* probably redundant... */
     154                 :          67 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     155                 :             : 
     156                 :             :         /* loop to find current tail page, in case someone else inserted too */
     157                 :          67 :         for (;;)
     158                 :             :         {
     159                 :          67 :                 BlockNumber nextblkno;
     160                 :             : 
     161                 :          67 :                 page = BufferGetPage(buf);
     162                 :          67 :                 pageopaque = HashPageGetOpaque(page);
     163                 :          67 :                 nextblkno = pageopaque->hasho_nextblkno;
     164                 :             : 
     165         [ -  + ]:          67 :                 if (!BlockNumberIsValid(nextblkno))
     166                 :          67 :                         break;
     167                 :             : 
     168                 :             :                 /* we assume we do not need to write the unmodified page */
     169         [ #  # ]:           0 :                 if (retain_pin)
     170                 :             :                 {
     171                 :             :                         /* pin will be retained only for the primary bucket page */
     172         [ #  # ]:           0 :                         Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
     173                 :           0 :                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     174                 :           0 :                 }
     175                 :             :                 else
     176                 :           0 :                         _hash_relbuf(rel, buf);
     177                 :             : 
     178                 :           0 :                 retain_pin = false;
     179                 :             : 
     180                 :           0 :                 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
     181      [ -  -  + ]:          67 :         }
     182                 :             : 
     183                 :             :         /* Get exclusive lock on the meta page */
     184                 :          67 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     185                 :             : 
     186                 :          67 :         _hash_checkpage(rel, metabuf, LH_META_PAGE);
     187                 :          67 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     188                 :             : 
     189                 :             :         /* start search at hashm_firstfree */
     190                 :          67 :         orig_firstfree = metap->hashm_firstfree;
     191                 :          67 :         first_page = orig_firstfree >> BMPG_SHIFT(metap);
     192                 :          67 :         bit = orig_firstfree & BMPG_MASK(metap);
     193                 :          67 :         i = first_page;
     194                 :          67 :         j = bit / BITS_PER_MAP;
     195                 :          67 :         bit &= ~(BITS_PER_MAP - 1);
     196                 :             : 
     197                 :             :         /* outer loop iterates once per bitmap page */
     198                 :         122 :         for (;;)
     199                 :             :         {
     200                 :         122 :                 BlockNumber mapblkno;
     201                 :         122 :                 Page            mappage;
     202                 :         122 :                 uint32          last_inpage;
     203                 :             : 
     204                 :             :                 /* want to end search with the last existing overflow page */
     205                 :         122 :                 splitnum = metap->hashm_ovflpoint;
     206                 :         122 :                 max_ovflpg = metap->hashm_spares[splitnum] - 1;
     207                 :         122 :                 last_page = max_ovflpg >> BMPG_SHIFT(metap);
     208                 :         122 :                 last_bit = max_ovflpg & BMPG_MASK(metap);
     209                 :             : 
     210         [ +  + ]:         122 :                 if (i > last_page)
     211                 :          55 :                         break;
     212                 :             : 
     213         [ +  - ]:          67 :                 Assert(i < metap->hashm_nmaps);
     214                 :          67 :                 mapblkno = metap->hashm_mapp[i];
     215                 :             : 
     216         [ +  - ]:          67 :                 if (i == last_page)
     217                 :          67 :                         last_inpage = last_bit;
     218                 :             :                 else
     219                 :           0 :                         last_inpage = BMPGSZ_BIT(metap) - 1;
     220                 :             : 
     221                 :             :                 /* Release exclusive lock on metapage while reading bitmap page */
     222                 :          67 :                 LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     223                 :             : 
     224                 :          67 :                 mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
     225                 :          67 :                 mappage = BufferGetPage(mapbuf);
     226                 :          67 :                 freep = HashPageGetBitmap(mappage);
     227                 :             : 
     228         [ +  + ]:         122 :                 for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
     229                 :             :                 {
     230         [ +  + ]:          67 :                         if (freep[j] != ALL_SET)
     231                 :             :                         {
     232                 :          12 :                                 page_found = true;
     233                 :             : 
     234                 :             :                                 /* Reacquire exclusive lock on the meta page */
     235                 :          12 :                                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     236                 :             : 
     237                 :             :                                 /* convert bit to bit number within page */
     238                 :          12 :                                 bit += _hash_firstfreebit(freep[j]);
     239                 :          12 :                                 bitmap_page_bit = bit;
     240                 :             : 
     241                 :             :                                 /* convert bit to absolute bit number */
     242                 :          12 :                                 bit += (i << BMPG_SHIFT(metap));
     243                 :             :                                 /* Calculate address of the recycled overflow page */
     244                 :          12 :                                 blkno = bitno_to_blkno(metap, bit);
     245                 :             : 
     246                 :             :                                 /* Fetch and init the recycled page */
     247                 :          12 :                                 ovflbuf = _hash_getinitbuf(rel, blkno);
     248                 :             : 
     249                 :          12 :                                 goto found;
     250                 :             :                         }
     251                 :          55 :                 }
     252                 :             : 
     253                 :             :                 /* No free space here, try to advance to next map page */
     254                 :          55 :                 _hash_relbuf(rel, mapbuf);
     255                 :          55 :                 mapbuf = InvalidBuffer;
     256                 :          55 :                 i++;
     257                 :          55 :                 j = 0;                                  /* scan from start of next map page */
     258                 :          55 :                 bit = 0;
     259                 :             : 
     260                 :             :                 /* Reacquire exclusive lock on the meta page */
     261                 :          55 :                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     262   [ -  +  +  + ]:         122 :         }
     263                 :             : 
     264                 :             :         /*
     265                 :             :          * No free pages --- have to extend the relation to add an overflow page.
     266                 :             :          * First, check to see if we have to add a new bitmap page too.
     267                 :             :          */
     268         [ -  + ]:          55 :         if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
     269                 :             :         {
     270                 :             :                 /*
     271                 :             :                  * We create the new bitmap page with all pages marked "in use".
     272                 :             :                  * Actually two pages in the new bitmap's range will exist
     273                 :             :                  * immediately: the bitmap page itself, and the following page which
     274                 :             :                  * is the one we return to the caller.  Both of these are correctly
     275                 :             :                  * marked "in use".  Subsequent pages do not exist yet, but it is
     276                 :             :                  * convenient to pre-mark them as "in use" too.
     277                 :             :                  */
     278                 :           0 :                 bit = metap->hashm_spares[splitnum];
     279                 :             : 
     280                 :             :                 /* metapage already has a write lock */
     281         [ #  # ]:           0 :                 if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
     282   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     283                 :             :                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     284                 :             :                                          errmsg("out of overflow pages in hash index \"%s\"",
     285                 :             :                                                         RelationGetRelationName(rel))));
     286                 :             : 
     287                 :           0 :                 newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
     288                 :           0 :         }
     289                 :             :         else
     290                 :             :         {
     291                 :             :                 /*
     292                 :             :                  * Nothing to do here; since the page will be past the last used page,
     293                 :             :                  * we know its bitmap bit was preinitialized to "in use".
     294                 :             :                  */
     295                 :             :         }
     296                 :             : 
     297                 :             :         /* Calculate address of the new overflow page */
     298         [ -  + ]:          55 :         bit = BufferIsValid(newmapbuf) ?
     299                 :          55 :                 metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
     300                 :          55 :         blkno = bitno_to_blkno(metap, bit);
     301                 :             : 
     302                 :             :         /*
     303                 :             :          * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
     304                 :             :          * relation length stays in sync with ours.  XXX It's annoying to do this
     305                 :             :          * with metapage write lock held; would be better to use a lock that
     306                 :             :          * doesn't block incoming searches.
     307                 :             :          *
     308                 :             :          * It is okay to hold two buffer locks here (one on tail page of bucket
     309                 :             :          * and other on new overflow page) since there cannot be anyone else
     310                 :             :          * contending for access to ovflbuf.
     311                 :             :          */
     312                 :          55 :         ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
     313                 :             : 
     314                 :             : found:
     315                 :             : 
     316                 :             :         /*
     317                 :             :          * Do the update.  No ereport(ERROR) until changes are logged. We want to
     318                 :             :          * log the changes for bitmap page and overflow page together to avoid
     319                 :             :          * loss of pages in case the new page is added.
     320                 :             :          */
     321                 :          67 :         START_CRIT_SECTION();
     322                 :             : 
     323         [ +  + ]:          67 :         if (page_found)
     324                 :             :         {
     325         [ +  - ]:          12 :                 Assert(BufferIsValid(mapbuf));
     326                 :             : 
     327                 :             :                 /* mark page "in use" in the bitmap */
     328                 :          12 :                 SETBIT(freep, bitmap_page_bit);
     329                 :          12 :                 MarkBufferDirty(mapbuf);
     330                 :          12 :         }
     331                 :             :         else
     332                 :             :         {
     333                 :             :                 /* update the count to indicate new overflow page is added */
     334                 :          55 :                 metap->hashm_spares[splitnum]++;
     335                 :             : 
     336         [ +  - ]:          55 :                 if (BufferIsValid(newmapbuf))
     337                 :             :                 {
     338                 :           0 :                         _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
     339                 :           0 :                         MarkBufferDirty(newmapbuf);
     340                 :             : 
     341                 :             :                         /* add the new bitmap page to the metapage's list of bitmaps */
     342                 :           0 :                         metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
     343                 :           0 :                         metap->hashm_nmaps++;
     344                 :           0 :                         metap->hashm_spares[splitnum]++;
     345                 :           0 :                 }
     346                 :             : 
     347                 :          55 :                 MarkBufferDirty(metabuf);
     348                 :             : 
     349                 :             :                 /*
     350                 :             :                  * for new overflow page, we don't need to explicitly set the bit in
     351                 :             :                  * bitmap page, as by default that will be set to "in use".
     352                 :             :                  */
     353                 :             :         }
     354                 :             : 
     355                 :             :         /*
     356                 :             :          * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
     357                 :             :          * changing it if someone moved it while we were searching bitmap pages.
     358                 :             :          */
     359         [ -  + ]:          67 :         if (metap->hashm_firstfree == orig_firstfree)
     360                 :             :         {
     361                 :          67 :                 metap->hashm_firstfree = bit + 1;
     362                 :          67 :                 MarkBufferDirty(metabuf);
     363                 :          67 :         }
     364                 :             : 
     365                 :             :         /* initialize new overflow page */
     366                 :          67 :         ovflpage = BufferGetPage(ovflbuf);
     367                 :          67 :         ovflopaque = HashPageGetOpaque(ovflpage);
     368                 :          67 :         ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
     369                 :          67 :         ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     370                 :          67 :         ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
     371                 :          67 :         ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
     372                 :          67 :         ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     373                 :             : 
     374                 :          67 :         MarkBufferDirty(ovflbuf);
     375                 :             : 
     376                 :             :         /* logically chain overflow page to previous page */
     377                 :          67 :         pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
     378                 :             : 
     379                 :          67 :         MarkBufferDirty(buf);
     380                 :             : 
     381                 :             :         /* XLOG stuff */
     382   [ +  -  +  -  :          67 :         if (RelationNeedsWAL(rel))
             +  +  +  + ]
     383                 :             :         {
     384                 :          53 :                 XLogRecPtr      recptr;
     385                 :          53 :                 xl_hash_add_ovfl_page xlrec;
     386                 :             : 
     387                 :          53 :                 xlrec.bmpage_found = page_found;
     388                 :          53 :                 xlrec.bmsize = metap->hashm_bmsize;
     389                 :             : 
     390                 :          53 :                 XLogBeginInsert();
     391                 :          53 :                 XLogRegisterData(&xlrec, SizeOfHashAddOvflPage);
     392                 :             : 
     393                 :          53 :                 XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
     394                 :          53 :                 XLogRegisterBufData(0, &pageopaque->hasho_bucket, sizeof(Bucket));
     395                 :             : 
     396                 :          53 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     397                 :             : 
     398         [ +  + ]:          53 :                 if (BufferIsValid(mapbuf))
     399                 :             :                 {
     400                 :          12 :                         XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
     401                 :          12 :                         XLogRegisterBufData(2, &bitmap_page_bit, sizeof(uint32));
     402                 :          12 :                 }
     403                 :             : 
     404         [ +  - ]:          53 :                 if (BufferIsValid(newmapbuf))
     405                 :           0 :                         XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
     406                 :             : 
     407                 :          53 :                 XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
     408                 :          53 :                 XLogRegisterBufData(4, &metap->hashm_firstfree, sizeof(uint32));
     409                 :             : 
     410                 :          53 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
     411                 :             : 
     412                 :          53 :                 PageSetLSN(BufferGetPage(ovflbuf), recptr);
     413                 :          53 :                 PageSetLSN(BufferGetPage(buf), recptr);
     414                 :             : 
     415         [ +  + ]:          53 :                 if (BufferIsValid(mapbuf))
     416                 :          12 :                         PageSetLSN(BufferGetPage(mapbuf), recptr);
     417                 :             : 
     418         [ +  - ]:          53 :                 if (BufferIsValid(newmapbuf))
     419                 :           0 :                         PageSetLSN(BufferGetPage(newmapbuf), recptr);
     420                 :             : 
     421                 :          53 :                 PageSetLSN(BufferGetPage(metabuf), recptr);
     422                 :          53 :         }
     423                 :             : 
     424         [ +  - ]:          67 :         END_CRIT_SECTION();
     425                 :             : 
     426         [ +  + ]:          67 :         if (retain_pin)
     427                 :          19 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     428                 :             :         else
     429                 :          48 :                 _hash_relbuf(rel, buf);
     430                 :             : 
     431         [ +  + ]:          67 :         if (BufferIsValid(mapbuf))
     432                 :          12 :                 _hash_relbuf(rel, mapbuf);
     433                 :             : 
     434                 :          67 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     435                 :             : 
     436         [ +  - ]:          67 :         if (BufferIsValid(newmapbuf))
     437                 :           0 :                 _hash_relbuf(rel, newmapbuf);
     438                 :             : 
     439                 :          67 :         return ovflbuf;
     440                 :          67 : }
     441                 :             : 
     442                 :             : /*
     443                 :             :  *      _hash_firstfreebit()
     444                 :             :  *
     445                 :             :  *      Return the number of the first bit that is not set in the word 'map'.
     446                 :             :  */
     447                 :             : static uint32
     448                 :          12 : _hash_firstfreebit(uint32 map)
     449                 :             : {
     450                 :          12 :         uint32          i,
     451                 :             :                                 mask;
     452                 :             : 
     453                 :          12 :         mask = 0x1;
     454         [ +  - ]:          97 :         for (i = 0; i < BITS_PER_MAP; i++)
     455                 :             :         {
     456         [ +  + ]:          97 :                 if (!(mask & map))
     457                 :          12 :                         return i;
     458                 :          85 :                 mask <<= 1;
     459                 :          85 :         }
     460                 :             : 
     461   [ #  #  #  # ]:           0 :         elog(ERROR, "firstfreebit found no free bit");
     462                 :             : 
     463                 :           0 :         return 0;                                       /* keep compiler quiet */
     464                 :          12 : }
     465                 :             : 
     466                 :             : /*
     467                 :             :  *      _hash_freeovflpage() -
     468                 :             :  *
     469                 :             :  *      Remove this overflow page from its bucket's chain, and mark the page as
     470                 :             :  *      free.  On entry, ovflbuf is write-locked; it is released before exiting.
     471                 :             :  *
     472                 :             :  *      Add the tuples (itups) to wbuf in this function.  We could do that in the
     473                 :             :  *      caller as well, but the advantage of doing it here is we can easily write
     474                 :             :  *      the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
     475                 :             :  *      removal of overflow page has to done as an atomic operation, otherwise
     476                 :             :  *      during replay on standby users might find duplicate records.
     477                 :             :  *
     478                 :             :  *      Since this function is invoked in VACUUM, we provide an access strategy
     479                 :             :  *      parameter that controls fetches of the bucket pages.
     480                 :             :  *
     481                 :             :  *      Returns the block number of the page that followed the given page
     482                 :             :  *      in the bucket, or InvalidBlockNumber if no following page.
     483                 :             :  *
     484                 :             :  *      NB: caller must not hold lock on metapage, nor on page, that's next to
     485                 :             :  *      ovflbuf in the bucket chain.  We don't acquire the lock on page that's
     486                 :             :  *      prior to ovflbuf in chain if it is same as wbuf because the caller already
     487                 :             :  *      has a lock on same.
     488                 :             :  */
     489                 :             : BlockNumber
     490                 :          31 : _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
     491                 :             :                                    Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
     492                 :             :                                    Size *tups_size, uint16 nitups,
     493                 :             :                                    BufferAccessStrategy bstrategy)
     494                 :             : {
     495                 :          31 :         HashMetaPage metap;
     496                 :          31 :         Buffer          metabuf;
     497                 :          31 :         Buffer          mapbuf;
     498                 :          31 :         BlockNumber ovflblkno;
     499                 :          31 :         BlockNumber prevblkno;
     500                 :          31 :         BlockNumber blkno;
     501                 :          31 :         BlockNumber nextblkno;
     502                 :          31 :         BlockNumber writeblkno;
     503                 :          31 :         HashPageOpaque ovflopaque;
     504                 :          31 :         Page            ovflpage;
     505                 :          31 :         Page            mappage;
     506                 :          31 :         uint32     *freep;
     507                 :          31 :         uint32          ovflbitno;
     508                 :          31 :         int32           bitmappage,
     509                 :             :                                 bitmapbit;
     510                 :          31 :         Bucket          bucket PG_USED_FOR_ASSERTS_ONLY;
     511                 :          31 :         Buffer          prevbuf = InvalidBuffer;
     512                 :          31 :         Buffer          nextbuf = InvalidBuffer;
     513                 :          31 :         bool            update_metap = false;
     514                 :             : 
     515                 :             :         /* Get information from the doomed page */
     516                 :          31 :         _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
     517                 :          31 :         ovflblkno = BufferGetBlockNumber(ovflbuf);
     518                 :          31 :         ovflpage = BufferGetPage(ovflbuf);
     519                 :          31 :         ovflopaque = HashPageGetOpaque(ovflpage);
     520                 :          31 :         nextblkno = ovflopaque->hasho_nextblkno;
     521                 :          31 :         prevblkno = ovflopaque->hasho_prevblkno;
     522                 :          31 :         writeblkno = BufferGetBlockNumber(wbuf);
     523                 :          31 :         bucket = ovflopaque->hasho_bucket;
     524                 :             : 
     525                 :             :         /*
     526                 :             :          * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
     527                 :             :          * up the bucket chain members behind and ahead of the overflow page being
     528                 :             :          * deleted.  Concurrency issues are avoided by using lock chaining as
     529                 :             :          * described atop hashbucketcleanup.
     530                 :             :          */
     531         [ -  + ]:          31 :         if (BlockNumberIsValid(prevblkno))
     532                 :             :         {
     533         [ +  + ]:          31 :                 if (prevblkno == writeblkno)
     534                 :          10 :                         prevbuf = wbuf;
     535                 :             :                 else
     536                 :          42 :                         prevbuf = _hash_getbuf_with_strategy(rel,
     537                 :          21 :                                                                                                  prevblkno,
     538                 :             :                                                                                                  HASH_WRITE,
     539                 :             :                                                                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
     540                 :          21 :                                                                                                  bstrategy);
     541                 :          31 :         }
     542         [ +  - ]:          31 :         if (BlockNumberIsValid(nextblkno))
     543                 :           0 :                 nextbuf = _hash_getbuf_with_strategy(rel,
     544                 :           0 :                                                                                          nextblkno,
     545                 :             :                                                                                          HASH_WRITE,
     546                 :             :                                                                                          LH_OVERFLOW_PAGE,
     547                 :           0 :                                                                                          bstrategy);
     548                 :             : 
     549                 :             :         /* Note: bstrategy is intentionally not used for metapage and bitmap */
     550                 :             : 
     551                 :             :         /* Read the metapage so we can determine which bitmap page to use */
     552                 :          31 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
     553                 :          31 :         metap = HashPageGetMeta(BufferGetPage(metabuf));
     554                 :             : 
     555                 :             :         /* Identify which bit to set */
     556                 :          31 :         ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
     557                 :             : 
     558                 :          31 :         bitmappage = ovflbitno >> BMPG_SHIFT(metap);
     559                 :          31 :         bitmapbit = ovflbitno & BMPG_MASK(metap);
     560                 :             : 
     561         [ +  - ]:          31 :         if (bitmappage >= metap->hashm_nmaps)
     562   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid overflow bit number %u", ovflbitno);
     563                 :          31 :         blkno = metap->hashm_mapp[bitmappage];
     564                 :             : 
     565                 :             :         /* Release metapage lock while we access the bitmap page */
     566                 :          31 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     567                 :             : 
     568                 :             :         /* read the bitmap page to clear the bitmap bit */
     569                 :          31 :         mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
     570                 :          31 :         mappage = BufferGetPage(mapbuf);
     571                 :          31 :         freep = HashPageGetBitmap(mappage);
     572         [ +  - ]:          31 :         Assert(ISSET(freep, bitmapbit));
     573                 :             : 
     574                 :             :         /* Get write-lock on metapage to update firstfree */
     575                 :          31 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     576                 :             : 
     577                 :             :         /* This operation needs to log multiple tuples, prepare WAL for that */
     578   [ +  -  +  -  :          31 :         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     579                 :          31 :                 XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
     580                 :             : 
     581                 :          31 :         START_CRIT_SECTION();
     582                 :             : 
     583                 :             :         /*
     584                 :             :          * we have to insert tuples on the "write" page, being careful to preserve
     585                 :             :          * hashkey ordering.  (If we insert many tuples into the same "write" page
     586                 :             :          * it would be worth qsort'ing them).
     587                 :             :          */
     588         [ +  + ]:          31 :         if (nitups > 0)
     589                 :             :         {
     590                 :          14 :                 _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     591                 :          14 :                 MarkBufferDirty(wbuf);
     592                 :          14 :         }
     593                 :             : 
     594                 :             :         /*
     595                 :             :          * Reinitialize the freed overflow page.  Just zeroing the page won't
     596                 :             :          * work, because WAL replay routines expect pages to be initialized. See
     597                 :             :          * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
     598                 :             :          * careful to make the special space valid here so that tools like
     599                 :             :          * pageinspect won't get confused.
     600                 :             :          */
     601                 :          31 :         _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
     602                 :             : 
     603                 :          31 :         ovflopaque = HashPageGetOpaque(ovflpage);
     604                 :             : 
     605                 :          31 :         ovflopaque->hasho_prevblkno = InvalidBlockNumber;
     606                 :          31 :         ovflopaque->hasho_nextblkno = InvalidBlockNumber;
     607                 :          31 :         ovflopaque->hasho_bucket = InvalidBucket;
     608                 :          31 :         ovflopaque->hasho_flag = LH_UNUSED_PAGE;
     609                 :          31 :         ovflopaque->hasho_page_id = HASHO_PAGE_ID;
     610                 :             : 
     611                 :          31 :         MarkBufferDirty(ovflbuf);
     612                 :             : 
     613         [ -  + ]:          31 :         if (BufferIsValid(prevbuf))
     614                 :             :         {
     615                 :          31 :                 Page            prevpage = BufferGetPage(prevbuf);
     616                 :          31 :                 HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
     617                 :             : 
     618         [ +  - ]:          31 :                 Assert(prevopaque->hasho_bucket == bucket);
     619                 :          31 :                 prevopaque->hasho_nextblkno = nextblkno;
     620                 :          31 :                 MarkBufferDirty(prevbuf);
     621                 :          31 :         }
     622         [ +  - ]:          31 :         if (BufferIsValid(nextbuf))
     623                 :             :         {
     624                 :           0 :                 Page            nextpage = BufferGetPage(nextbuf);
     625                 :           0 :                 HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
     626                 :             : 
     627         [ #  # ]:           0 :                 Assert(nextopaque->hasho_bucket == bucket);
     628                 :           0 :                 nextopaque->hasho_prevblkno = prevblkno;
     629                 :           0 :                 MarkBufferDirty(nextbuf);
     630                 :           0 :         }
     631                 :             : 
     632                 :             :         /* Clear the bitmap bit to indicate that this overflow page is free */
     633                 :          31 :         CLRBIT(freep, bitmapbit);
     634                 :          31 :         MarkBufferDirty(mapbuf);
     635                 :             : 
     636                 :             :         /* if this is now the first free page, update hashm_firstfree */
     637         [ +  + ]:          31 :         if (ovflbitno < metap->hashm_firstfree)
     638                 :             :         {
     639                 :          30 :                 metap->hashm_firstfree = ovflbitno;
     640                 :          30 :                 update_metap = true;
     641                 :          30 :                 MarkBufferDirty(metabuf);
     642                 :          30 :         }
     643                 :             : 
     644                 :             :         /* XLOG stuff */
     645   [ +  -  +  -  :          31 :         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     646                 :             :         {
     647                 :          31 :                 xl_hash_squeeze_page xlrec;
     648                 :          31 :                 XLogRecPtr      recptr;
     649                 :          31 :                 int                     i;
     650                 :          31 :                 bool            mod_wbuf = false;
     651                 :             : 
     652                 :          31 :                 xlrec.prevblkno = prevblkno;
     653                 :          31 :                 xlrec.nextblkno = nextblkno;
     654                 :          31 :                 xlrec.ntups = nitups;
     655                 :          31 :                 xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
     656                 :          31 :                 xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
     657                 :             : 
     658                 :          31 :                 XLogBeginInsert();
     659                 :          31 :                 XLogRegisterData(&xlrec, SizeOfHashSqueezePage);
     660                 :             : 
     661                 :             :                 /*
     662                 :             :                  * bucket buffer was not changed, but still needs to be registered to
     663                 :             :                  * ensure that we can acquire a cleanup lock on it during replay.
     664                 :             :                  */
     665         [ +  + ]:          31 :                 if (!xlrec.is_prim_bucket_same_wrt)
     666                 :             :                 {
     667                 :           8 :                         uint8           flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
     668                 :             : 
     669                 :           8 :                         XLogRegisterBuffer(0, bucketbuf, flags);
     670                 :           8 :                 }
     671                 :             : 
     672         [ +  + ]:          31 :                 if (xlrec.ntups > 0)
     673                 :             :                 {
     674                 :          14 :                         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
     675                 :             : 
     676                 :             :                         /* Remember that wbuf is modified. */
     677                 :          14 :                         mod_wbuf = true;
     678                 :             : 
     679                 :          28 :                         XLogRegisterBufData(1, itup_offsets,
     680                 :          14 :                                                                 nitups * sizeof(OffsetNumber));
     681         [ +  + ]:         522 :                         for (i = 0; i < nitups; i++)
     682                 :         508 :                                 XLogRegisterBufData(1, itups[i], tups_size[i]);
     683                 :          14 :                 }
     684   [ +  +  +  + ]:          17 :                 else if (xlrec.is_prim_bucket_same_wrt || xlrec.is_prev_bucket_same_wrt)
     685                 :             :                 {
     686                 :          16 :                         uint8           wbuf_flags;
     687                 :             : 
     688                 :             :                         /*
     689                 :             :                          * A write buffer needs to be registered even if no tuples are
     690                 :             :                          * added to it to ensure that we can acquire a cleanup lock on it
     691                 :             :                          * if it is the same as primary bucket buffer or update the
     692                 :             :                          * nextblkno if it is same as the previous bucket buffer.
     693                 :             :                          */
     694         [ +  - ]:          16 :                         Assert(xlrec.ntups == 0);
     695                 :             : 
     696                 :          16 :                         wbuf_flags = REGBUF_STANDARD;
     697         [ +  + ]:          16 :                         if (!xlrec.is_prev_bucket_same_wrt)
     698                 :             :                         {
     699                 :          13 :                                 wbuf_flags |= REGBUF_NO_CHANGE;
     700                 :          13 :                         }
     701                 :             :                         else
     702                 :             :                         {
     703                 :             :                                 /* Remember that wbuf is modified. */
     704                 :           3 :                                 mod_wbuf = true;
     705                 :             :                         }
     706                 :          16 :                         XLogRegisterBuffer(1, wbuf, wbuf_flags);
     707                 :          16 :                 }
     708                 :             : 
     709                 :          31 :                 XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
     710                 :             : 
     711                 :             :                 /*
     712                 :             :                  * If prevpage and the writepage (block in which we are moving tuples
     713                 :             :                  * from overflow) are same, then no need to separately register
     714                 :             :                  * prevpage.  During replay, we can directly update the nextblock in
     715                 :             :                  * writepage.
     716                 :             :                  */
     717   [ +  -  +  + ]:          31 :                 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     718                 :          21 :                         XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
     719                 :             : 
     720         [ +  - ]:          31 :                 if (BufferIsValid(nextbuf))
     721                 :           0 :                         XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
     722                 :             : 
     723                 :          31 :                 XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
     724                 :          31 :                 XLogRegisterBufData(5, &bitmapbit, sizeof(uint32));
     725                 :             : 
     726         [ +  + ]:          31 :                 if (update_metap)
     727                 :             :                 {
     728                 :          30 :                         XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
     729                 :          30 :                         XLogRegisterBufData(6, &metap->hashm_firstfree, sizeof(uint32));
     730                 :          30 :                 }
     731                 :             : 
     732                 :          31 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
     733                 :             : 
     734                 :             :                 /* Set LSN iff wbuf is modified. */
     735         [ +  + ]:          31 :                 if (mod_wbuf)
     736                 :          17 :                         PageSetLSN(BufferGetPage(wbuf), recptr);
     737                 :             : 
     738                 :          31 :                 PageSetLSN(BufferGetPage(ovflbuf), recptr);
     739                 :             : 
     740   [ +  -  +  + ]:          31 :                 if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
     741                 :          21 :                         PageSetLSN(BufferGetPage(prevbuf), recptr);
     742         [ +  - ]:          31 :                 if (BufferIsValid(nextbuf))
     743                 :           0 :                         PageSetLSN(BufferGetPage(nextbuf), recptr);
     744                 :             : 
     745                 :          31 :                 PageSetLSN(BufferGetPage(mapbuf), recptr);
     746                 :             : 
     747         [ +  + ]:          31 :                 if (update_metap)
     748                 :          30 :                         PageSetLSN(BufferGetPage(metabuf), recptr);
     749                 :          31 :         }
     750                 :             : 
     751         [ +  - ]:          31 :         END_CRIT_SECTION();
     752                 :             : 
     753                 :             :         /* release previous bucket if it is not same as write bucket */
     754   [ +  -  +  + ]:          31 :         if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
     755                 :          21 :                 _hash_relbuf(rel, prevbuf);
     756                 :             : 
     757         [ -  + ]:          31 :         if (BufferIsValid(ovflbuf))
     758                 :          31 :                 _hash_relbuf(rel, ovflbuf);
     759                 :             : 
     760         [ +  - ]:          31 :         if (BufferIsValid(nextbuf))
     761                 :           0 :                 _hash_relbuf(rel, nextbuf);
     762                 :             : 
     763                 :          31 :         _hash_relbuf(rel, mapbuf);
     764                 :          31 :         _hash_relbuf(rel, metabuf);
     765                 :             : 
     766                 :          62 :         return nextblkno;
     767                 :          31 : }
     768                 :             : 
     769                 :             : 
     770                 :             : /*
     771                 :             :  *      _hash_initbitmapbuffer()
     772                 :             :  *
     773                 :             :  *       Initialize a new bitmap page.  All bits in the new bitmap page are set to
     774                 :             :  *       "1", indicating "in use".
     775                 :             :  */
     776                 :             : void
     777                 :          30 : _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
     778                 :             : {
     779                 :          30 :         Page            pg;
     780                 :          30 :         HashPageOpaque op;
     781                 :          30 :         uint32     *freep;
     782                 :             : 
     783                 :          30 :         pg = BufferGetPage(buf);
     784                 :             : 
     785                 :             :         /* initialize the page */
     786         [ +  - ]:          30 :         if (initpage)
     787                 :           0 :                 _hash_pageinit(pg, BufferGetPageSize(buf));
     788                 :             : 
     789                 :             :         /* initialize the page's special space */
     790                 :          30 :         op = HashPageGetOpaque(pg);
     791                 :          30 :         op->hasho_prevblkno = InvalidBlockNumber;
     792                 :          30 :         op->hasho_nextblkno = InvalidBlockNumber;
     793                 :          30 :         op->hasho_bucket = InvalidBucket;
     794                 :          30 :         op->hasho_flag = LH_BITMAP_PAGE;
     795                 :          30 :         op->hasho_page_id = HASHO_PAGE_ID;
     796                 :             : 
     797                 :             :         /* set all of the bits to 1 */
     798                 :          30 :         freep = HashPageGetBitmap(pg);
     799                 :          30 :         memset(freep, 0xFF, bmsize);
     800                 :             : 
     801                 :             :         /*
     802                 :             :          * Set pd_lower just past the end of the bitmap page data.  We could even
     803                 :             :          * set pd_lower equal to pd_upper, but this is more precise and makes the
     804                 :             :          * page look compressible to xlog.c.
     805                 :             :          */
     806                 :          30 :         ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
     807                 :          30 : }
     808                 :             : 
     809                 :             : 
     810                 :             : /*
     811                 :             :  *      _hash_squeezebucket(rel, bucket)
     812                 :             :  *
     813                 :             :  *      Try to squeeze the tuples onto pages occurring earlier in the
     814                 :             :  *      bucket chain in an attempt to free overflow pages. When we start
     815                 :             :  *      the "squeezing", the page from which we start taking tuples (the
     816                 :             :  *      "read" page) is the last bucket in the bucket chain and the page
     817                 :             :  *      onto which we start squeezing tuples (the "write" page) is the
     818                 :             :  *      first page in the bucket chain.  The read page works backward and
     819                 :             :  *      the write page works forward; the procedure terminates when the
     820                 :             :  *      read page and write page are the same page.
     821                 :             :  *
     822                 :             :  *      At completion of this procedure, it is guaranteed that all pages in
     823                 :             :  *      the bucket are nonempty, unless the bucket is totally empty (in
     824                 :             :  *      which case all overflow pages will be freed).  The original implementation
     825                 :             :  *      required that to be true on entry as well, but it's a lot easier for
     826                 :             :  *      callers to leave empty overflow pages and let this guy clean it up.
     827                 :             :  *
     828                 :             :  *      Caller must acquire cleanup lock on the primary page of the target
     829                 :             :  *      bucket to exclude any scans that are in progress, which could easily
     830                 :             :  *      be confused into returning the same tuple more than once or some tuples
     831                 :             :  *      not at all by the rearrangement we are performing here.  To prevent
     832                 :             :  *      any concurrent scan to cross the squeeze scan we use lock chaining
     833                 :             :  *      similar to hashbucketcleanup.  Refer comments atop hashbucketcleanup.
     834                 :             :  *
     835                 :             :  *      We need to retain a pin on the primary bucket to ensure that no concurrent
     836                 :             :  *      split can start.
     837                 :             :  *
     838                 :             :  *      Since this function is invoked in VACUUM, we provide an access strategy
     839                 :             :  *      parameter that controls fetches of the bucket pages.
     840                 :             :  */
     841                 :             : void
     842                 :         220 : _hash_squeezebucket(Relation rel,
     843                 :             :                                         Bucket bucket,
     844                 :             :                                         BlockNumber bucket_blkno,
     845                 :             :                                         Buffer bucket_buf,
     846                 :             :                                         BufferAccessStrategy bstrategy)
     847                 :             : {
     848                 :         220 :         BlockNumber wblkno;
     849                 :         220 :         BlockNumber rblkno;
     850                 :         220 :         Buffer          wbuf;
     851                 :         220 :         Buffer          rbuf;
     852                 :         220 :         Page            wpage;
     853                 :         220 :         Page            rpage;
     854                 :         220 :         HashPageOpaque wopaque;
     855                 :         220 :         HashPageOpaque ropaque;
     856                 :             : 
     857                 :             :         /*
     858                 :             :          * start squeezing into the primary bucket page.
     859                 :             :          */
     860                 :         220 :         wblkno = bucket_blkno;
     861                 :         220 :         wbuf = bucket_buf;
     862                 :         220 :         wpage = BufferGetPage(wbuf);
     863                 :         220 :         wopaque = HashPageGetOpaque(wpage);
     864                 :             : 
     865                 :             :         /*
     866                 :             :          * if there aren't any overflow pages, there's nothing to squeeze. caller
     867                 :             :          * is responsible for releasing the pin on primary bucket page.
     868                 :             :          */
     869         [ +  + ]:         220 :         if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
     870                 :             :         {
     871                 :         208 :                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
     872                 :         208 :                 return;
     873                 :             :         }
     874                 :             : 
     875                 :             :         /*
     876                 :             :          * Find the last page in the bucket chain by starting at the base bucket
     877                 :             :          * page and working forward.  Note: we assume that a hash bucket chain is
     878                 :             :          * usually smaller than the buffer ring being used by VACUUM, else using
     879                 :             :          * the access strategy here would be counterproductive.
     880                 :             :          */
     881                 :          12 :         rbuf = InvalidBuffer;
     882                 :          12 :         ropaque = wopaque;
     883                 :          12 :         do
     884                 :             :         {
     885                 :          60 :                 rblkno = ropaque->hasho_nextblkno;
     886         [ +  + ]:          60 :                 if (rbuf != InvalidBuffer)
     887                 :          48 :                         _hash_relbuf(rel, rbuf);
     888                 :         120 :                 rbuf = _hash_getbuf_with_strategy(rel,
     889                 :          60 :                                                                                   rblkno,
     890                 :             :                                                                                   HASH_WRITE,
     891                 :             :                                                                                   LH_OVERFLOW_PAGE,
     892                 :          60 :                                                                                   bstrategy);
     893                 :          60 :                 rpage = BufferGetPage(rbuf);
     894                 :          60 :                 ropaque = HashPageGetOpaque(rpage);
     895         [ +  - ]:          60 :                 Assert(ropaque->hasho_bucket == bucket);
     896         [ +  + ]:          60 :         } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
     897                 :             : 
     898                 :             :         /*
     899                 :             :          * squeeze the tuples.
     900                 :             :          */
     901                 :          33 :         for (;;)
     902                 :             :         {
     903                 :          33 :                 OffsetNumber roffnum;
     904                 :          33 :                 OffsetNumber maxroffnum;
     905                 :          33 :                 OffsetNumber deletable[MaxOffsetNumber];
     906                 :          33 :                 IndexTuple      itups[MaxIndexTuplesPerPage];
     907                 :          33 :                 Size            tups_size[MaxIndexTuplesPerPage];
     908                 :          33 :                 OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
     909                 :          33 :                 uint16          ndeletable = 0;
     910                 :          33 :                 uint16          nitups = 0;
     911                 :          33 :                 Size            all_tups_size = 0;
     912                 :          33 :                 int                     i;
     913                 :          33 :                 bool            retain_pin = false;
     914                 :             : 
     915                 :             : readpage:
     916                 :             :                 /* Scan each tuple in "read" page */
     917                 :          34 :                 maxroffnum = PageGetMaxOffsetNumber(rpage);
     918         [ +  + ]:         884 :                 for (roffnum = FirstOffsetNumber;
     919                 :         884 :                          roffnum <= maxroffnum;
     920                 :         850 :                          roffnum = OffsetNumberNext(roffnum))
     921                 :             :                 {
     922                 :         853 :                         IndexTuple      itup;
     923                 :         853 :                         Size            itemsz;
     924                 :             : 
     925                 :             :                         /* skip dead tuples */
     926         [ -  + ]:         853 :                         if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
     927                 :           0 :                                 continue;
     928                 :             : 
     929                 :        1706 :                         itup = (IndexTuple) PageGetItem(rpage,
     930                 :         853 :                                                                                         PageGetItemId(rpage, roffnum));
     931                 :         853 :                         itemsz = IndexTupleSize(itup);
     932                 :         853 :                         itemsz = MAXALIGN(itemsz);
     933                 :             : 
     934                 :             :                         /*
     935                 :             :                          * Walk up the bucket chain, looking for a page big enough for
     936                 :             :                          * this item and all other accumulated items.  Exit if we reach
     937                 :             :                          * the read page.
     938                 :             :                          */
     939         [ +  + ]:         879 :                         while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
     940                 :             :                         {
     941                 :          29 :                                 Buffer          next_wbuf = InvalidBuffer;
     942                 :          29 :                                 bool            tups_moved = false;
     943                 :             : 
     944         [ +  - ]:          29 :                                 Assert(!PageIsEmpty(wpage));
     945                 :             : 
     946         [ +  + ]:          29 :                                 if (wblkno == bucket_blkno)
     947                 :           5 :                                         retain_pin = true;
     948                 :             : 
     949                 :          29 :                                 wblkno = wopaque->hasho_nextblkno;
     950         [ -  + ]:          29 :                                 Assert(BlockNumberIsValid(wblkno));
     951                 :             : 
     952                 :             :                                 /* don't need to move to next page if we reached the read page */
     953         [ +  + ]:          29 :                                 if (wblkno != rblkno)
     954                 :          54 :                                         next_wbuf = _hash_getbuf_with_strategy(rel,
     955                 :          27 :                                                                                                                    wblkno,
     956                 :             :                                                                                                                    HASH_WRITE,
     957                 :             :                                                                                                                    LH_OVERFLOW_PAGE,
     958                 :          27 :                                                                                                                    bstrategy);
     959                 :             : 
     960         [ +  + ]:          29 :                                 if (nitups > 0)
     961                 :             :                                 {
     962         [ -  + ]:           1 :                                         Assert(nitups == ndeletable);
     963                 :             : 
     964                 :             :                                         /*
     965                 :             :                                          * This operation needs to log multiple tuples, prepare
     966                 :             :                                          * WAL for that.
     967                 :             :                                          */
     968   [ +  -  +  -  :           1 :                                         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     969                 :           1 :                                                 XLogEnsureRecordSpace(0, 3 + nitups);
     970                 :             : 
     971                 :           1 :                                         START_CRIT_SECTION();
     972                 :             : 
     973                 :             :                                         /*
     974                 :             :                                          * we have to insert tuples on the "write" page, being
     975                 :             :                                          * careful to preserve hashkey ordering.  (If we insert
     976                 :             :                                          * many tuples into the same "write" page it would be
     977                 :             :                                          * worth qsort'ing them).
     978                 :             :                                          */
     979                 :           1 :                                         _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
     980                 :           1 :                                         MarkBufferDirty(wbuf);
     981                 :             : 
     982                 :             :                                         /* Delete tuples we already moved off read page */
     983                 :           1 :                                         PageIndexMultiDelete(rpage, deletable, ndeletable);
     984                 :           1 :                                         MarkBufferDirty(rbuf);
     985                 :             : 
     986                 :             :                                         /* XLOG stuff */
     987   [ +  -  +  -  :           1 :                                         if (RelationNeedsWAL(rel))
             +  -  -  + ]
     988                 :             :                                         {
     989                 :           1 :                                                 XLogRecPtr      recptr;
     990                 :           1 :                                                 xl_hash_move_page_contents xlrec;
     991                 :             : 
     992                 :           1 :                                                 xlrec.ntups = nitups;
     993                 :           1 :                                                 xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
     994                 :             : 
     995                 :           1 :                                                 XLogBeginInsert();
     996                 :           1 :                                                 XLogRegisterData(&xlrec, SizeOfHashMovePageContents);
     997                 :             : 
     998                 :             :                                                 /*
     999                 :             :                                                  * bucket buffer was not changed, but still needs to
    1000                 :             :                                                  * be registered to ensure that we can acquire a
    1001                 :             :                                                  * cleanup lock on it during replay.
    1002                 :             :                                                  */
    1003         [ +  - ]:           1 :                                                 if (!xlrec.is_prim_bucket_same_wrt)
    1004                 :             :                                                 {
    1005                 :           0 :                                                         int                     flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
    1006                 :             : 
    1007                 :           0 :                                                         XLogRegisterBuffer(0, bucket_buf, flags);
    1008                 :           0 :                                                 }
    1009                 :             : 
    1010                 :           1 :                                                 XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
    1011                 :           2 :                                                 XLogRegisterBufData(1, itup_offsets,
    1012                 :           1 :                                                                                         nitups * sizeof(OffsetNumber));
    1013         [ +  + ]:         343 :                                                 for (i = 0; i < nitups; i++)
    1014                 :         342 :                                                         XLogRegisterBufData(1, itups[i], tups_size[i]);
    1015                 :             : 
    1016                 :           1 :                                                 XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
    1017                 :           2 :                                                 XLogRegisterBufData(2, deletable,
    1018                 :           1 :                                                                                         ndeletable * sizeof(OffsetNumber));
    1019                 :             : 
    1020                 :           1 :                                                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
    1021                 :             : 
    1022                 :           1 :                                                 PageSetLSN(BufferGetPage(wbuf), recptr);
    1023                 :           1 :                                                 PageSetLSN(BufferGetPage(rbuf), recptr);
    1024                 :           1 :                                         }
    1025                 :             : 
    1026         [ +  - ]:           1 :                                         END_CRIT_SECTION();
    1027                 :             : 
    1028                 :           1 :                                         tups_moved = true;
    1029                 :           1 :                                 }
    1030                 :             : 
    1031                 :             :                                 /*
    1032                 :             :                                  * release the lock on previous page after acquiring the lock
    1033                 :             :                                  * on next page
    1034                 :             :                                  */
    1035         [ +  + ]:          29 :                                 if (retain_pin)
    1036                 :           5 :                                         LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1037                 :             :                                 else
    1038                 :          24 :                                         _hash_relbuf(rel, wbuf);
    1039                 :             : 
    1040                 :             :                                 /* nothing more to do if we reached the read page */
    1041         [ +  + ]:          29 :                                 if (rblkno == wblkno)
    1042                 :             :                                 {
    1043                 :           2 :                                         _hash_relbuf(rel, rbuf);
    1044                 :           2 :                                         return;
    1045                 :             :                                 }
    1046                 :             : 
    1047                 :          27 :                                 wbuf = next_wbuf;
    1048                 :          27 :                                 wpage = BufferGetPage(wbuf);
    1049                 :          27 :                                 wopaque = HashPageGetOpaque(wpage);
    1050         [ -  + ]:          27 :                                 Assert(wopaque->hasho_bucket == bucket);
    1051                 :          27 :                                 retain_pin = false;
    1052                 :             : 
    1053                 :             :                                 /* be tidy */
    1054         [ +  + ]:         369 :                                 for (i = 0; i < nitups; i++)
    1055                 :         342 :                                         pfree(itups[i]);
    1056                 :          27 :                                 nitups = 0;
    1057                 :          27 :                                 all_tups_size = 0;
    1058                 :          27 :                                 ndeletable = 0;
    1059                 :             : 
    1060                 :             :                                 /*
    1061                 :             :                                  * after moving the tuples, rpage would have been compacted,
    1062                 :             :                                  * so we need to rescan it.
    1063                 :             :                                  */
    1064         [ +  + ]:          27 :                                 if (tups_moved)
    1065                 :           1 :                                         goto readpage;
    1066         [ +  + ]:          29 :                         }
    1067                 :             : 
    1068                 :             :                         /* remember tuple for deletion from "read" page */
    1069                 :         850 :                         deletable[ndeletable++] = roffnum;
    1070                 :             : 
    1071                 :             :                         /*
    1072                 :             :                          * we need a copy of index tuples as they can be freed as part of
    1073                 :             :                          * overflow page, however we need them to write a WAL record in
    1074                 :             :                          * _hash_freeovflpage.
    1075                 :             :                          */
    1076                 :         850 :                         itups[nitups] = CopyIndexTuple(itup);
    1077                 :         850 :                         tups_size[nitups++] = itemsz;
    1078                 :         850 :                         all_tups_size += itemsz;
    1079   [ +  -  +  + ]:         853 :                 }
    1080                 :             : 
    1081                 :             :                 /*
    1082                 :             :                  * If we reach here, there are no live tuples on the "read" page ---
    1083                 :             :                  * it was empty when we got to it, or we moved them all.  So we can
    1084                 :             :                  * just free the page without bothering with deleting tuples
    1085                 :             :                  * individually.  Then advance to the previous "read" page.
    1086                 :             :                  *
    1087                 :             :                  * Tricky point here: if our read and write pages are adjacent in the
    1088                 :             :                  * bucket chain, our write lock on wbuf will conflict with
    1089                 :             :                  * _hash_freeovflpage's attempt to update the sibling links of the
    1090                 :             :                  * removed page.  In that case, we don't need to lock it again.
    1091                 :             :                  */
    1092                 :          31 :                 rblkno = ropaque->hasho_prevblkno;
    1093         [ -  + ]:          31 :                 Assert(BlockNumberIsValid(rblkno));
    1094                 :             : 
    1095                 :             :                 /* free this overflow page (releases rbuf) */
    1096                 :          62 :                 _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
    1097                 :          31 :                                                    tups_size, nitups, bstrategy);
    1098                 :             : 
    1099                 :             :                 /* be tidy */
    1100         [ +  + ]:         539 :                 for (i = 0; i < nitups; i++)
    1101                 :         508 :                         pfree(itups[i]);
    1102                 :             : 
    1103                 :             :                 /* are we freeing the page adjacent to wbuf? */
    1104         [ +  + ]:          31 :                 if (rblkno == wblkno)
    1105                 :             :                 {
    1106                 :             :                         /* retain the pin on primary bucket page till end of bucket scan */
    1107         [ +  + ]:          10 :                         if (wblkno == bucket_blkno)
    1108                 :           7 :                                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
    1109                 :             :                         else
    1110                 :           3 :                                 _hash_relbuf(rel, wbuf);
    1111                 :          10 :                         return;
    1112                 :             :                 }
    1113                 :             : 
    1114                 :          42 :                 rbuf = _hash_getbuf_with_strategy(rel,
    1115                 :          21 :                                                                                   rblkno,
    1116                 :             :                                                                                   HASH_WRITE,
    1117                 :             :                                                                                   LH_OVERFLOW_PAGE,
    1118                 :          21 :                                                                                   bstrategy);
    1119                 :          21 :                 rpage = BufferGetPage(rbuf);
    1120                 :          21 :                 ropaque = HashPageGetOpaque(rpage);
    1121         [ +  - ]:          21 :                 Assert(ropaque->hasho_bucket == bucket);
    1122         [ +  + ]:          33 :         }
    1123                 :             : 
    1124                 :             :         /* NOTREACHED */
    1125                 :         220 : }
        

Generated by: LCOV version 2.3.2-1