LCOV - code coverage report
Current view: top level - src/backend/access/gin - gininsert.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 26.5 % 889 236
Test Date: 2026-01-26 10:56:24 Functions: 25.7 % 35 9
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 15.6 % 288 45

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * gininsert.c
       4                 :             :  *        insert routines for the postgres inverted index access method.
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *                      src/backend/access/gin/gininsert.c
      12                 :             :  *-------------------------------------------------------------------------
      13                 :             :  */
      14                 :             : 
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "access/gin_private.h"
      18                 :             : #include "access/gin_tuple.h"
      19                 :             : #include "access/parallel.h"
      20                 :             : #include "access/table.h"
      21                 :             : #include "access/tableam.h"
      22                 :             : #include "access/xloginsert.h"
      23                 :             : #include "catalog/index.h"
      24                 :             : #include "catalog/pg_collation.h"
      25                 :             : #include "commands/progress.h"
      26                 :             : #include "miscadmin.h"
      27                 :             : #include "nodes/execnodes.h"
      28                 :             : #include "pgstat.h"
      29                 :             : #include "storage/bufmgr.h"
      30                 :             : #include "storage/predicate.h"
      31                 :             : #include "tcop/tcopprot.h"
      32                 :             : #include "utils/datum.h"
      33                 :             : #include "utils/memutils.h"
      34                 :             : #include "utils/builtins.h"
      35                 :             : #include "utils/rel.h"
      36                 :             : #include "utils/typcache.h"
      37                 :             : 
      38                 :             : 
      39                 :             : /* Magic numbers for parallel state sharing */
      40                 :             : #define PARALLEL_KEY_GIN_SHARED                 UINT64CONST(0xB000000000000001)
      41                 :             : #define PARALLEL_KEY_TUPLESORT                  UINT64CONST(0xB000000000000002)
      42                 :             : #define PARALLEL_KEY_QUERY_TEXT                 UINT64CONST(0xB000000000000003)
      43                 :             : #define PARALLEL_KEY_WAL_USAGE                  UINT64CONST(0xB000000000000004)
      44                 :             : #define PARALLEL_KEY_BUFFER_USAGE               UINT64CONST(0xB000000000000005)
      45                 :             : 
      46                 :             : /*
      47                 :             :  * Status for index builds performed in parallel.  This is allocated in a
      48                 :             :  * dynamic shared memory segment.
      49                 :             :  */
      50                 :             : typedef struct GinBuildShared
      51                 :             : {
      52                 :             :         /*
      53                 :             :          * These fields are not modified during the build.  They primarily exist
      54                 :             :          * for the benefit of worker processes that need to create state
      55                 :             :          * corresponding to that used by the leader.
      56                 :             :          */
      57                 :             :         Oid                     heaprelid;
      58                 :             :         Oid                     indexrelid;
      59                 :             :         bool            isconcurrent;
      60                 :             :         int                     scantuplesortstates;
      61                 :             : 
      62                 :             :         /*
      63                 :             :          * workersdonecv is used to monitor the progress of workers.  All parallel
      64                 :             :          * participants must indicate that they are done before leader can use
      65                 :             :          * results built by the workers (and before leader can write the data into
      66                 :             :          * the index).
      67                 :             :          */
      68                 :             :         ConditionVariable workersdonecv;
      69                 :             : 
      70                 :             :         /*
      71                 :             :          * mutex protects all following fields
      72                 :             :          *
      73                 :             :          * These fields contain status information of interest to GIN index builds
      74                 :             :          * that must work just the same when an index is built in parallel.
      75                 :             :          */
      76                 :             :         slock_t         mutex;
      77                 :             : 
      78                 :             :         /*
      79                 :             :          * Mutable state that is maintained by workers, and reported back to
      80                 :             :          * leader at end of the scans.
      81                 :             :          *
      82                 :             :          * nparticipantsdone is number of worker processes finished.
      83                 :             :          *
      84                 :             :          * reltuples is the total number of input heap tuples.
      85                 :             :          *
      86                 :             :          * indtuples is the total number of tuples that made it into the index.
      87                 :             :          */
      88                 :             :         int                     nparticipantsdone;
      89                 :             :         double          reltuples;
      90                 :             :         double          indtuples;
      91                 :             : 
      92                 :             :         /*
      93                 :             :          * ParallelTableScanDescData data follows. Can't directly embed here, as
      94                 :             :          * implementations of the parallel table scan desc interface might need
      95                 :             :          * stronger alignment.
      96                 :             :          */
      97                 :             : } GinBuildShared;
      98                 :             : 
      99                 :             : /*
     100                 :             :  * Return pointer to a GinBuildShared's parallel table scan.
     101                 :             :  *
     102                 :             :  * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
     103                 :             :  * MAXALIGN.
     104                 :             :  */
     105                 :             : #define ParallelTableScanFromGinBuildShared(shared) \
     106                 :             :         (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
     107                 :             : 
     108                 :             : /*
     109                 :             :  * Status for leader in parallel index build.
     110                 :             :  */
     111                 :             : typedef struct GinLeader
     112                 :             : {
     113                 :             :         /* parallel context itself */
     114                 :             :         ParallelContext *pcxt;
     115                 :             : 
     116                 :             :         /*
     117                 :             :          * nparticipanttuplesorts is the exact number of worker processes
     118                 :             :          * successfully launched, plus one leader process if it participates as a
     119                 :             :          * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
     120                 :             :          * participating as a worker).
     121                 :             :          */
     122                 :             :         int                     nparticipanttuplesorts;
     123                 :             : 
     124                 :             :         /*
     125                 :             :          * Leader process convenience pointers to shared state (leader avoids TOC
     126                 :             :          * lookups).
     127                 :             :          *
     128                 :             :          * GinBuildShared is the shared state for entire build.  sharedsort is the
     129                 :             :          * shared, tuplesort-managed state passed to each process tuplesort.
     130                 :             :          * snapshot is the snapshot used by the scan iff an MVCC snapshot is
     131                 :             :          * required.
     132                 :             :          */
     133                 :             :         GinBuildShared *ginshared;
     134                 :             :         Sharedsort *sharedsort;
     135                 :             :         Snapshot        snapshot;
     136                 :             :         WalUsage   *walusage;
     137                 :             :         BufferUsage *bufferusage;
     138                 :             : } GinLeader;
     139                 :             : 
     140                 :             : typedef struct
     141                 :             : {
     142                 :             :         GinState        ginstate;
     143                 :             :         double          indtuples;
     144                 :             :         GinStatsData buildStats;
     145                 :             :         MemoryContext tmpCtx;
     146                 :             :         MemoryContext funcCtx;
     147                 :             :         BuildAccumulator accum;
     148                 :             :         ItemPointerData tid;
     149                 :             :         int                     work_mem;
     150                 :             : 
     151                 :             :         /*
     152                 :             :          * bs_leader is only present when a parallel index build is performed, and
     153                 :             :          * only in the leader process.
     154                 :             :          */
     155                 :             :         GinLeader  *bs_leader;
     156                 :             : 
     157                 :             :         /* number of participating workers (including leader) */
     158                 :             :         int                     bs_num_workers;
     159                 :             : 
     160                 :             :         /* used to pass information from workers to leader */
     161                 :             :         double          bs_numtuples;
     162                 :             :         double          bs_reltuples;
     163                 :             : 
     164                 :             :         /*
     165                 :             :          * The sortstate is used by workers (including the leader). It has to be
     166                 :             :          * part of the build state, because that's the only thing passed to the
     167                 :             :          * build callback etc.
     168                 :             :          */
     169                 :             :         Tuplesortstate *bs_sortstate;
     170                 :             : 
     171                 :             :         /*
     172                 :             :          * The sortstate used only within a single worker for the first merge pass
     173                 :             :          * happening there. In principle it doesn't need to be part of the build
     174                 :             :          * state and we could pass it around directly, but it's more convenient
     175                 :             :          * this way. And it's part of the build state, after all.
     176                 :             :          */
     177                 :             :         Tuplesortstate *bs_worker_sort;
     178                 :             : } GinBuildState;
     179                 :             : 
     180                 :             : 
     181                 :             : /* parallel index builds */
     182                 :             : static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     183                 :             :                                                                 bool isconcurrent, int request);
     184                 :             : static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
     185                 :             : static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
     186                 :             : static double _gin_parallel_heapscan(GinBuildState *state);
     187                 :             : static double _gin_parallel_merge(GinBuildState *state);
     188                 :             : static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
     189                 :             :                                                                                           Relation heap, Relation index);
     190                 :             : static void _gin_parallel_scan_and_build(GinBuildState *state,
     191                 :             :                                                                                  GinBuildShared *ginshared,
     192                 :             :                                                                                  Sharedsort *sharedsort,
     193                 :             :                                                                                  Relation heap, Relation index,
     194                 :             :                                                                                  int sortmem, bool progress);
     195                 :             : 
     196                 :             : static ItemPointer _gin_parse_tuple_items(GinTuple *a);
     197                 :             : static Datum _gin_parse_tuple_key(GinTuple *a);
     198                 :             : 
     199                 :             : static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
     200                 :             :                                                                   Datum key, int16 typlen, bool typbyval,
     201                 :             :                                                                   ItemPointerData *items, uint32 nitems,
     202                 :             :                                                                   Size *len);
     203                 :             : 
     204                 :             : /*
     205                 :             :  * Adds array of item pointers to tuple's posting list, or
     206                 :             :  * creates posting tree and tuple pointing to tree in case
     207                 :             :  * of not enough space.  Max size of tuple is defined in
     208                 :             :  * GinFormTuple().  Returns a new, modified index tuple.
     209                 :             :  * items[] must be in sorted order with no duplicates.
     210                 :             :  */
     211                 :             : static IndexTuple
     212                 :        3660 : addItemPointersToLeafTuple(GinState *ginstate,
     213                 :             :                                                    IndexTuple old,
     214                 :             :                                                    ItemPointerData *items, uint32 nitem,
     215                 :             :                                                    GinStatsData *buildStats, Buffer buffer)
     216                 :             : {
     217                 :        3660 :         OffsetNumber attnum;
     218                 :        3660 :         Datum           key;
     219                 :        3660 :         GinNullCategory category;
     220                 :        3660 :         IndexTuple      res;
     221                 :        3660 :         ItemPointerData *newItems,
     222                 :             :                            *oldItems;
     223                 :        3660 :         int                     oldNPosting,
     224                 :             :                                 newNPosting,
     225                 :             :                                 nwritten;
     226                 :        3660 :         GinPostingList *compressedList;
     227                 :             : 
     228         [ +  - ]:        3660 :         Assert(!GinIsPostingTree(old));
     229                 :             : 
     230                 :        3660 :         attnum = gintuple_get_attrnum(ginstate, old);
     231                 :        3660 :         key = gintuple_get_key(ginstate, old, &category);
     232                 :             : 
     233                 :             :         /* merge the old and new posting lists */
     234                 :        3660 :         oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
     235                 :             : 
     236                 :        7320 :         newItems = ginMergeItemPointers(items, nitem,
     237                 :        3660 :                                                                         oldItems, oldNPosting,
     238                 :             :                                                                         &newNPosting);
     239                 :             : 
     240                 :             :         /* Compress the posting list, and try to a build tuple with room for it */
     241                 :        3660 :         res = NULL;
     242                 :        3660 :         compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, &nwritten);
     243         [ -  + ]:        3660 :         if (nwritten == newNPosting)
     244                 :             :         {
     245                 :        7320 :                 res = GinFormTuple(ginstate, attnum, key, category,
     246                 :        3660 :                                                    (char *) compressedList,
     247                 :        3660 :                                                    SizeOfGinPostingList(compressedList),
     248                 :        3660 :                                                    newNPosting,
     249                 :             :                                                    false);
     250                 :        3660 :         }
     251                 :             : 
     252                 :        3660 :         pfree(newItems);
     253                 :        3660 :         pfree(compressedList);
     254                 :             : 
     255         [ +  + ]:        3660 :         if (!res)
     256                 :             :         {
     257                 :             :                 /* posting list would be too big, convert to posting tree */
     258                 :           1 :                 BlockNumber postingRoot;
     259                 :             : 
     260                 :             :                 /*
     261                 :             :                  * Initialize posting tree with the old tuple's posting list.  It's
     262                 :             :                  * surely small enough to fit on one posting-tree page, and should
     263                 :             :                  * already be in order with no duplicates.
     264                 :             :                  */
     265                 :           2 :                 postingRoot = createPostingTree(ginstate->index,
     266                 :           1 :                                                                                 oldItems,
     267                 :           1 :                                                                                 oldNPosting,
     268                 :           1 :                                                                                 buildStats,
     269                 :           1 :                                                                                 buffer);
     270                 :             : 
     271                 :             :                 /* Now insert the TIDs-to-be-added into the posting tree */
     272                 :           2 :                 ginInsertItemPointers(ginstate->index, postingRoot,
     273                 :           1 :                                                           items, nitem,
     274                 :           1 :                                                           buildStats);
     275                 :             : 
     276                 :             :                 /* And build a new posting-tree-only result tuple */
     277                 :           1 :                 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     278                 :           1 :                 GinSetPostingTree(res, postingRoot);
     279                 :           1 :         }
     280                 :        3660 :         pfree(oldItems);
     281                 :             : 
     282                 :        7320 :         return res;
     283                 :        3660 : }
     284                 :             : 
     285                 :             : /*
     286                 :             :  * Build a fresh leaf tuple, either posting-list or posting-tree format
     287                 :             :  * depending on whether the given items list will fit.
     288                 :             :  * items[] must be in sorted order with no duplicates.
     289                 :             :  *
     290                 :             :  * This is basically the same logic as in addItemPointersToLeafTuple,
     291                 :             :  * but working from slightly different input.
     292                 :             :  */
     293                 :             : static IndexTuple
     294                 :       74494 : buildFreshLeafTuple(GinState *ginstate,
     295                 :             :                                         OffsetNumber attnum, Datum key, GinNullCategory category,
     296                 :             :                                         ItemPointerData *items, uint32 nitem,
     297                 :             :                                         GinStatsData *buildStats, Buffer buffer)
     298                 :             : {
     299                 :       74494 :         IndexTuple      res = NULL;
     300                 :       74494 :         GinPostingList *compressedList;
     301                 :       74494 :         int                     nwritten;
     302                 :             : 
     303                 :             :         /* try to build a posting list tuple with all the items */
     304                 :       74494 :         compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, &nwritten);
     305         [ +  + ]:       74494 :         if (nwritten == nitem)
     306                 :             :         {
     307                 :      148978 :                 res = GinFormTuple(ginstate, attnum, key, category,
     308                 :       74489 :                                                    (char *) compressedList,
     309                 :       74489 :                                                    SizeOfGinPostingList(compressedList),
     310                 :       74489 :                                                    nitem, false);
     311                 :       74489 :         }
     312                 :       74494 :         pfree(compressedList);
     313                 :             : 
     314         [ +  + ]:       74494 :         if (!res)
     315                 :             :         {
     316                 :             :                 /* posting list would be too big, build posting tree */
     317                 :           5 :                 BlockNumber postingRoot;
     318                 :             : 
     319                 :             :                 /*
     320                 :             :                  * Build posting-tree-only result tuple.  We do this first so as to
     321                 :             :                  * fail quickly if the key is too big.
     322                 :             :                  */
     323                 :           5 :                 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
     324                 :             : 
     325                 :             :                 /*
     326                 :             :                  * Initialize a new posting tree with the TIDs.
     327                 :             :                  */
     328                 :          10 :                 postingRoot = createPostingTree(ginstate->index, items, nitem,
     329                 :           5 :                                                                                 buildStats, buffer);
     330                 :             : 
     331                 :             :                 /* And save the root link in the result tuple */
     332                 :           5 :                 GinSetPostingTree(res, postingRoot);
     333                 :           5 :         }
     334                 :             : 
     335                 :      148988 :         return res;
     336                 :       74494 : }
     337                 :             : 
     338                 :             : /*
     339                 :             :  * Insert one or more heap TIDs associated with the given key value.
     340                 :             :  * This will either add a single key entry, or enlarge a pre-existing entry.
     341                 :             :  *
     342                 :             :  * During an index build, buildStats is non-null and the counters
     343                 :             :  * it contains should be incremented as needed.
     344                 :             :  */
     345                 :             : void
     346                 :       81496 : ginEntryInsert(GinState *ginstate,
     347                 :             :                            OffsetNumber attnum, Datum key, GinNullCategory category,
     348                 :             :                            ItemPointerData *items, uint32 nitem,
     349                 :             :                            GinStatsData *buildStats)
     350                 :             : {
     351                 :       81496 :         GinBtreeData btree;
     352                 :       81496 :         GinBtreeEntryInsertData insertdata;
     353                 :       81496 :         GinBtreeStack *stack;
     354                 :       81496 :         IndexTuple      itup;
     355                 :       81496 :         Page            page;
     356                 :             : 
     357                 :       81496 :         insertdata.isDelete = false;
     358                 :             : 
     359                 :       81496 :         ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
     360                 :       81496 :         btree.isBuild = (buildStats != NULL);
     361                 :             : 
     362                 :       81496 :         stack = ginFindLeafPage(&btree, false, false);
     363                 :       81496 :         page = BufferGetPage(stack->buffer);
     364                 :             : 
     365         [ +  + ]:       81496 :         if (btree.findItem(&btree, stack))
     366                 :             :         {
     367                 :             :                 /* found pre-existing entry */
     368                 :        7002 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     369                 :             : 
     370         [ +  + ]:        7002 :                 if (GinIsPostingTree(itup))
     371                 :             :                 {
     372                 :             :                         /* add entries to existing posting tree */
     373                 :        3342 :                         BlockNumber rootPostingTree = GinGetPostingTree(itup);
     374                 :             : 
     375                 :             :                         /* release all stack */
     376                 :        3342 :                         LockBuffer(stack->buffer, GIN_UNLOCK);
     377                 :        3342 :                         freeGinBtreeStack(stack);
     378                 :             : 
     379                 :             :                         /* insert into posting tree */
     380                 :        6684 :                         ginInsertItemPointers(ginstate->index, rootPostingTree,
     381                 :        3342 :                                                                   items, nitem,
     382                 :        3342 :                                                                   buildStats);
     383                 :             :                         return;
     384                 :        3342 :                 }
     385                 :             : 
     386                 :        7320 :                 CheckForSerializableConflictIn(ginstate->index, NULL,
     387                 :        3660 :                                                                            BufferGetBlockNumber(stack->buffer));
     388                 :             :                 /* modify an existing leaf entry */
     389                 :        7320 :                 itup = addItemPointersToLeafTuple(ginstate, itup,
     390                 :        3660 :                                                                                   items, nitem, buildStats, stack->buffer);
     391                 :             : 
     392                 :        3660 :                 insertdata.isDelete = true;
     393                 :        3660 :         }
     394                 :             :         else
     395                 :             :         {
     396                 :      148988 :                 CheckForSerializableConflictIn(ginstate->index, NULL,
     397                 :       74494 :                                                                            BufferGetBlockNumber(stack->buffer));
     398                 :             :                 /* no match, so construct a new leaf entry */
     399                 :      148988 :                 itup = buildFreshLeafTuple(ginstate, attnum, key, category,
     400                 :       74494 :                                                                    items, nitem, buildStats, stack->buffer);
     401                 :             : 
     402                 :             :                 /*
     403                 :             :                  * nEntries counts leaf tuples, so increment it only when we make a
     404                 :             :                  * new one.
     405                 :             :                  */
     406         [ +  + ]:       74494 :                 if (buildStats)
     407                 :       14488 :                         buildStats->nEntries++;
     408                 :             :         }
     409                 :             : 
     410                 :             :         /* Insert the new or modified leaf tuple */
     411                 :       78154 :         insertdata.entry = itup;
     412                 :       78154 :         ginInsertValue(&btree, stack, &insertdata, buildStats);
     413                 :       78154 :         pfree(itup);
     414         [ -  + ]:       81496 : }
     415                 :             : 
     416                 :             : /*
     417                 :             :  * Extract index entries for a single indexable item, and add them to the
     418                 :             :  * BuildAccumulator's state.
     419                 :             :  *
     420                 :             :  * This function is used only during initial index creation.
     421                 :             :  */
     422                 :             : static void
     423                 :       15064 : ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
     424                 :             :                                            Datum value, bool isNull,
     425                 :             :                                            ItemPointer heapptr)
     426                 :             : {
     427                 :       15064 :         Datum      *entries;
     428                 :       15064 :         GinNullCategory *categories;
     429                 :       15064 :         int32           nentries;
     430                 :       15064 :         MemoryContext oldCtx;
     431                 :             : 
     432                 :       15064 :         oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
     433                 :       30128 :         entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
     434                 :       15064 :                                                                 value, isNull,
     435                 :             :                                                                 &nentries, &categories);
     436                 :       15064 :         MemoryContextSwitchTo(oldCtx);
     437                 :             : 
     438                 :       30128 :         ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
     439                 :       15064 :                                            entries, categories, nentries);
     440                 :             : 
     441                 :       15064 :         buildstate->indtuples += nentries;
     442                 :             : 
     443                 :       15064 :         MemoryContextReset(buildstate->funcCtx);
     444                 :       15064 : }
     445                 :             : 
     446                 :             : static void
     447                 :       14961 : ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
     448                 :             :                                  bool *isnull, bool tupleIsAlive, void *state)
     449                 :             : {
     450                 :       14961 :         GinBuildState *buildstate = (GinBuildState *) state;
     451                 :       14961 :         MemoryContext oldCtx;
     452                 :       14961 :         int                     i;
     453                 :             : 
     454                 :       14961 :         oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     455                 :             : 
     456         [ +  + ]:       30025 :         for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     457                 :       30128 :                 ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     458                 :       15064 :                                                            values[i], isnull[i], tid);
     459                 :             : 
     460                 :             :         /* If we've maxed out our available memory, dump everything to the index */
     461         [ +  - ]:       14961 :         if (buildstate->accum.allocatedMemory >= maintenance_work_mem * (Size) 1024)
     462                 :             :         {
     463                 :           0 :                 ItemPointerData *list;
     464                 :           0 :                 Datum           key;
     465                 :           0 :                 GinNullCategory category;
     466                 :           0 :                 uint32          nlist;
     467                 :           0 :                 OffsetNumber attnum;
     468                 :             : 
     469                 :           0 :                 ginBeginBAScan(&buildstate->accum);
     470   [ #  #  #  # ]:           0 :                 while ((list = ginGetBAEntry(&buildstate->accum,
     471                 :           0 :                                                                          &attnum, &key, &category, &nlist)) != NULL)
     472                 :             :                 {
     473                 :             :                         /* there could be many entries, so be willing to abort here */
     474         [ #  # ]:           0 :                         CHECK_FOR_INTERRUPTS();
     475                 :           0 :                         ginEntryInsert(&buildstate->ginstate, attnum, key, category,
     476                 :           0 :                                                    list, nlist, &buildstate->buildStats);
     477                 :             :                 }
     478                 :             : 
     479                 :           0 :                 MemoryContextReset(buildstate->tmpCtx);
     480                 :           0 :                 ginInitBA(&buildstate->accum);
     481                 :           0 :         }
     482                 :             : 
     483                 :       14961 :         MemoryContextSwitchTo(oldCtx);
     484                 :       14961 : }
     485                 :             : 
     486                 :             : /*
     487                 :             :  * ginFlushBuildState
     488                 :             :  *              Write all data from BuildAccumulator into the tuplesort.
     489                 :             :  *
     490                 :             :  * The number of TIDs written to the tuplesort at once is limited, to reduce
     491                 :             :  * the amount of memory needed when merging the intermediate results later.
     492                 :             :  * The leader will see up to two chunks per worker, so calculate the limit to
     493                 :             :  * not need more than MaxAllocSize overall.
     494                 :             :  *
     495                 :             :  * We don't need to worry about overflowing maintenance_work_mem. We can't
     496                 :             :  * build chunks larger than work_mem, and that limit was set so that workers
     497                 :             :  * produce sufficiently small chunks.
     498                 :             :  */
     499                 :             : static void
     500                 :           0 : ginFlushBuildState(GinBuildState *buildstate, Relation index)
     501                 :             : {
     502                 :           0 :         ItemPointerData *list;
     503                 :           0 :         Datum           key;
     504                 :           0 :         GinNullCategory category;
     505                 :           0 :         uint32          nlist;
     506                 :           0 :         OffsetNumber attnum;
     507                 :           0 :         TupleDesc       tdesc = RelationGetDescr(index);
     508                 :           0 :         uint32          maxlen;
     509                 :             : 
     510                 :             :         /* maximum number of TIDs per chunk (two chunks per worker) */
     511                 :           0 :         maxlen = MaxAllocSize / sizeof(ItemPointerData);
     512                 :           0 :         maxlen /= (2 * buildstate->bs_num_workers);
     513                 :             : 
     514                 :           0 :         ginBeginBAScan(&buildstate->accum);
     515   [ #  #  #  # ]:           0 :         while ((list = ginGetBAEntry(&buildstate->accum,
     516                 :           0 :                                                                  &attnum, &key, &category, &nlist)) != NULL)
     517                 :             :         {
     518                 :             :                 /* information about the key */
     519                 :           0 :                 CompactAttribute *attr = TupleDescCompactAttr(tdesc, (attnum - 1));
     520                 :             : 
     521                 :             :                 /* start of the chunk */
     522                 :           0 :                 uint32          offset = 0;
     523                 :             : 
     524                 :             :                 /* split the entry into smaller chunk with up to maxlen items */
     525         [ #  # ]:           0 :                 while (offset < nlist)
     526                 :             :                 {
     527                 :             :                         /* GIN tuple and tuple length */
     528                 :           0 :                         GinTuple   *tup;
     529                 :           0 :                         Size            tuplen;
     530         [ #  # ]:           0 :                         uint32          len = Min(maxlen, nlist - offset);
     531                 :             : 
     532                 :             :                         /* there could be many entries, so be willing to abort here */
     533         [ #  # ]:           0 :                         CHECK_FOR_INTERRUPTS();
     534                 :             : 
     535                 :           0 :                         tup = _gin_build_tuple(attnum, category,
     536                 :           0 :                                                                    key, attr->attlen, attr->attbyval,
     537                 :           0 :                                                                    &list[offset], len,
     538                 :             :                                                                    &tuplen);
     539                 :             : 
     540                 :           0 :                         offset += len;
     541                 :             : 
     542                 :           0 :                         tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
     543                 :             : 
     544                 :           0 :                         pfree(tup);
     545                 :           0 :                 }
     546                 :           0 :         }
     547                 :             : 
     548                 :           0 :         MemoryContextReset(buildstate->tmpCtx);
     549                 :           0 :         ginInitBA(&buildstate->accum);
     550                 :           0 : }
     551                 :             : 
     552                 :             : /*
     553                 :             :  * ginBuildCallbackParallel
     554                 :             :  *              Callback for the parallel index build.
     555                 :             :  *
     556                 :             :  * This is similar to the serial build callback ginBuildCallback, but
     557                 :             :  * instead of writing the accumulated entries into the index, each worker
     558                 :             :  * writes them into a (local) tuplesort.
     559                 :             :  *
     560                 :             :  * The worker then sorts and combines these entries, before writing them
     561                 :             :  * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
     562                 :             :  * for the whole process).
     563                 :             :  */
     564                 :             : static void
     565                 :           0 : ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
     566                 :             :                                                  bool *isnull, bool tupleIsAlive, void *state)
     567                 :             : {
     568                 :           0 :         GinBuildState *buildstate = (GinBuildState *) state;
     569                 :           0 :         MemoryContext oldCtx;
     570                 :           0 :         int                     i;
     571                 :             : 
     572                 :           0 :         oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
     573                 :             : 
     574                 :             :         /*
     575                 :             :          * if scan wrapped around - flush accumulated entries and start anew
     576                 :             :          *
     577                 :             :          * With parallel scans, we don't have a guarantee the scan does not start
     578                 :             :          * half-way through the relation (serial builds disable sync scans and
     579                 :             :          * always start from block 0, parallel scans require allow_sync=true).
     580                 :             :          *
     581                 :             :          * Building the posting lists assumes the TIDs are monotonic and never go
     582                 :             :          * back, and the wrap around would break that. We handle that by detecting
     583                 :             :          * the wraparound, and flushing all entries. This means we'll later see
     584                 :             :          * two separate entries with non-overlapping TID lists (which can be
     585                 :             :          * combined by merge sort).
     586                 :             :          *
     587                 :             :          * To detect a wraparound, we remember the last TID seen by each worker
     588                 :             :          * (for any key). If the next TID seen by the worker is lower, the scan
     589                 :             :          * must have wrapped around.
     590                 :             :          */
     591         [ #  # ]:           0 :         if (ItemPointerCompare(tid, &buildstate->tid) < 0)
     592                 :           0 :                 ginFlushBuildState(buildstate, index);
     593                 :             : 
     594                 :             :         /* remember the TID we're about to process */
     595                 :           0 :         buildstate->tid = *tid;
     596                 :             : 
     597         [ #  # ]:           0 :         for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
     598                 :           0 :                 ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
     599                 :           0 :                                                            values[i], isnull[i], tid);
     600                 :             : 
     601                 :             :         /*
     602                 :             :          * If we've maxed out our available memory, dump everything to the
     603                 :             :          * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
     604                 :             :          * the other half is used for the tuplesort.
     605                 :             :          */
     606         [ #  # ]:           0 :         if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
     607                 :           0 :                 ginFlushBuildState(buildstate, index);
     608                 :             : 
     609                 :           0 :         MemoryContextSwitchTo(oldCtx);
     610                 :           0 : }
     611                 :             : 
     612                 :             : IndexBuildResult *
     613                 :          17 : ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     614                 :             : {
     615                 :          17 :         IndexBuildResult *result;
     616                 :          17 :         double          reltuples;
     617                 :          17 :         GinBuildState buildstate;
     618                 :          17 :         GinBuildState *state = &buildstate;
     619                 :          17 :         Buffer          RootBuffer,
     620                 :             :                                 MetaBuffer;
     621                 :          17 :         ItemPointerData *list;
     622                 :          17 :         Datum           key;
     623                 :          17 :         GinNullCategory category;
     624                 :          17 :         uint32          nlist;
     625                 :          17 :         MemoryContext oldCtx;
     626                 :          17 :         OffsetNumber attnum;
     627                 :             : 
     628         [ +  - ]:          17 :         if (RelationGetNumberOfBlocks(index) != 0)
     629   [ #  #  #  # ]:           0 :                 elog(ERROR, "index \"%s\" already contains data",
     630                 :             :                          RelationGetRelationName(index));
     631                 :             : 
     632                 :          17 :         initGinState(&buildstate.ginstate, index);
     633                 :          17 :         buildstate.indtuples = 0;
     634                 :          17 :         memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
     635                 :             : 
     636                 :             :         /* Initialize fields for parallel build too. */
     637                 :          17 :         buildstate.bs_numtuples = 0;
     638                 :          17 :         buildstate.bs_reltuples = 0;
     639                 :          17 :         buildstate.bs_leader = NULL;
     640                 :          17 :         memset(&buildstate.tid, 0, sizeof(ItemPointerData));
     641                 :             : 
     642                 :             :         /* initialize the meta page */
     643                 :          17 :         MetaBuffer = GinNewBuffer(index);
     644                 :             : 
     645                 :             :         /* initialize the root page */
     646                 :          17 :         RootBuffer = GinNewBuffer(index);
     647                 :             : 
     648                 :          17 :         START_CRIT_SECTION();
     649                 :          17 :         GinInitMetabuffer(MetaBuffer);
     650                 :          17 :         MarkBufferDirty(MetaBuffer);
     651                 :          17 :         GinInitBuffer(RootBuffer, GIN_LEAF);
     652                 :          17 :         MarkBufferDirty(RootBuffer);
     653                 :             : 
     654                 :             : 
     655                 :          17 :         UnlockReleaseBuffer(MetaBuffer);
     656                 :          17 :         UnlockReleaseBuffer(RootBuffer);
     657         [ +  - ]:          17 :         END_CRIT_SECTION();
     658                 :             : 
     659                 :             :         /* count the root as first entry page */
     660                 :          17 :         buildstate.buildStats.nEntryPages++;
     661                 :             : 
     662                 :             :         /*
     663                 :             :          * create a temporary memory context that is used to hold data not yet
     664                 :             :          * dumped out to the index
     665                 :             :          */
     666                 :          17 :         buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
     667                 :             :                                                                                           "Gin build temporary context",
     668                 :             :                                                                                           ALLOCSET_DEFAULT_SIZES);
     669                 :             : 
     670                 :             :         /*
     671                 :             :          * create a temporary memory context that is used for calling
     672                 :             :          * ginExtractEntries(), and can be reset after each tuple
     673                 :             :          */
     674                 :          17 :         buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
     675                 :             :                                                                                            "Gin build temporary context for user-defined function",
     676                 :             :                                                                                            ALLOCSET_DEFAULT_SIZES);
     677                 :             : 
     678                 :          17 :         buildstate.accum.ginstate = &buildstate.ginstate;
     679                 :          17 :         ginInitBA(&buildstate.accum);
     680                 :             : 
     681                 :             :         /* Report table scan phase started */
     682                 :          17 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
     683                 :             :                                                                  PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
     684                 :             : 
     685                 :             :         /*
     686                 :             :          * Attempt to launch parallel worker scan when required
     687                 :             :          *
     688                 :             :          * XXX plan_create_index_workers makes the number of workers dependent on
     689                 :             :          * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
     690                 :             :          * reasonable too, because we sort the data just like btree. It does
     691                 :             :          * ignore the memory used to accumulate data in memory (set by work_mem),
     692                 :             :          * but there is no way to communicate that to plan_create_index_workers.
     693                 :             :          */
     694         [ +  - ]:          17 :         if (indexInfo->ii_ParallelWorkers > 0)
     695                 :           0 :                 _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
     696                 :           0 :                                                         indexInfo->ii_ParallelWorkers);
     697                 :             : 
     698                 :             :         /*
     699                 :             :          * If parallel build requested and at least one worker process was
     700                 :             :          * successfully launched, set up coordination state, wait for workers to
     701                 :             :          * complete. Then read all tuples from the shared tuplesort and insert
     702                 :             :          * them into the index.
     703                 :             :          *
     704                 :             :          * In serial mode, simply scan the table and build the index one index
     705                 :             :          * tuple at a time.
     706                 :             :          */
     707         [ +  - ]:          17 :         if (state->bs_leader)
     708                 :             :         {
     709                 :           0 :                 SortCoordinate coordinate;
     710                 :             : 
     711                 :           0 :                 coordinate = palloc0_object(SortCoordinateData);
     712                 :           0 :                 coordinate->isWorker = false;
     713                 :           0 :                 coordinate->nParticipants =
     714                 :           0 :                         state->bs_leader->nparticipanttuplesorts;
     715                 :           0 :                 coordinate->sharedsort = state->bs_leader->sharedsort;
     716                 :             : 
     717                 :             :                 /*
     718                 :             :                  * Begin leader tuplesort.
     719                 :             :                  *
     720                 :             :                  * In cases where parallelism is involved, the leader receives the
     721                 :             :                  * same share of maintenance_work_mem as a serial sort (it is
     722                 :             :                  * generally treated in the same way as a serial sort once we return).
     723                 :             :                  * Parallel worker Tuplesortstates will have received only a fraction
     724                 :             :                  * of maintenance_work_mem, though.
     725                 :             :                  *
     726                 :             :                  * We rely on the lifetime of the Leader Tuplesortstate almost not
     727                 :             :                  * overlapping with any worker Tuplesortstate's lifetime.  There may
     728                 :             :                  * be some small overlap, but that's okay because we rely on leader
     729                 :             :                  * Tuplesortstate only allocating a small, fixed amount of memory
     730                 :             :                  * here. When its tuplesort_performsort() is called (by our caller),
     731                 :             :                  * and significant amounts of memory are likely to be used, all
     732                 :             :                  * workers must have already freed almost all memory held by their
     733                 :             :                  * Tuplesortstates (they are about to go away completely, too).  The
     734                 :             :                  * overall effect is that maintenance_work_mem always represents an
     735                 :             :                  * absolute high watermark on the amount of memory used by a CREATE
     736                 :             :                  * INDEX operation, regardless of the use of parallelism or any other
     737                 :             :                  * factor.
     738                 :             :                  */
     739                 :           0 :                 state->bs_sortstate =
     740                 :           0 :                         tuplesort_begin_index_gin(heap, index,
     741                 :           0 :                                                                           maintenance_work_mem, coordinate,
     742                 :             :                                                                           TUPLESORT_NONE);
     743                 :             : 
     744                 :             :                 /* scan the relation in parallel and merge per-worker results */
     745                 :           0 :                 reltuples = _gin_parallel_merge(state);
     746                 :             : 
     747                 :           0 :                 _gin_end_parallel(state->bs_leader, state);
     748                 :           0 :         }
     749                 :             :         else                                            /* no parallel index build */
     750                 :             :         {
     751                 :             :                 /*
     752                 :             :                  * Do the heap scan.  We disallow sync scan here because
     753                 :             :                  * dataPlaceToPage prefers to receive tuples in TID order.
     754                 :             :                  */
     755                 :          17 :                 reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
     756                 :             :                                                                                    ginBuildCallback, &buildstate, NULL);
     757                 :             : 
     758                 :             :                 /* dump remaining entries to the index */
     759                 :          17 :                 oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
     760                 :          17 :                 ginBeginBAScan(&buildstate.accum);
     761   [ +  +  +  + ]:       14505 :                 while ((list = ginGetBAEntry(&buildstate.accum,
     762                 :       14505 :                                                                          &attnum, &key, &category, &nlist)) != NULL)
     763                 :             :                 {
     764                 :             :                         /* there could be many entries, so be willing to abort here */
     765         [ +  - ]:       14488 :                         CHECK_FOR_INTERRUPTS();
     766                 :       28976 :                         ginEntryInsert(&buildstate.ginstate, attnum, key, category,
     767                 :       14488 :                                                    list, nlist, &buildstate.buildStats);
     768                 :             :                 }
     769                 :          17 :                 MemoryContextSwitchTo(oldCtx);
     770                 :             :         }
     771                 :             : 
     772                 :          17 :         MemoryContextDelete(buildstate.funcCtx);
     773                 :          17 :         MemoryContextDelete(buildstate.tmpCtx);
     774                 :             : 
     775                 :             :         /*
     776                 :             :          * Update metapage stats
     777                 :             :          */
     778                 :          17 :         buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
     779                 :          17 :         ginUpdateStats(index, &buildstate.buildStats, true);
     780                 :             : 
     781                 :             :         /*
     782                 :             :          * We didn't write WAL records as we built the index, so if WAL-logging is
     783                 :             :          * required, write all pages to the WAL now.
     784                 :             :          */
     785   [ +  +  +  -  :          17 :         if (RelationNeedsWAL(index))
             -  +  #  # ]
     786                 :             :         {
     787                 :           0 :                 log_newpage_range(index, MAIN_FORKNUM,
     788                 :           0 :                                                   0, RelationGetNumberOfBlocks(index),
     789                 :             :                                                   true);
     790                 :           0 :         }
     791                 :             : 
     792                 :             :         /*
     793                 :             :          * Return statistics
     794                 :             :          */
     795                 :          17 :         result = palloc_object(IndexBuildResult);
     796                 :             : 
     797                 :          17 :         result->heap_tuples = reltuples;
     798                 :          17 :         result->index_tuples = buildstate.indtuples;
     799                 :             : 
     800                 :          34 :         return result;
     801                 :          17 : }
     802                 :             : 
     803                 :             : /*
     804                 :             :  *      ginbuildempty() -- build an empty gin index in the initialization fork
     805                 :             :  */
     806                 :             : void
     807                 :           1 : ginbuildempty(Relation index)
     808                 :             : {
     809                 :           1 :         Buffer          RootBuffer,
     810                 :             :                                 MetaBuffer;
     811                 :             : 
     812                 :             :         /* An empty GIN index has two pages. */
     813                 :           1 :         MetaBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     814                 :             :                                                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     815                 :           1 :         RootBuffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
     816                 :             :                                                                    EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
     817                 :             : 
     818                 :             :         /* Initialize and xlog metabuffer and root buffer. */
     819                 :           1 :         START_CRIT_SECTION();
     820                 :           1 :         GinInitMetabuffer(MetaBuffer);
     821                 :           1 :         MarkBufferDirty(MetaBuffer);
     822                 :           1 :         log_newpage_buffer(MetaBuffer, true);
     823                 :           1 :         GinInitBuffer(RootBuffer, GIN_LEAF);
     824                 :           1 :         MarkBufferDirty(RootBuffer);
     825                 :           1 :         log_newpage_buffer(RootBuffer, false);
     826         [ +  - ]:           1 :         END_CRIT_SECTION();
     827                 :             : 
     828                 :             :         /* Unlock and release the buffers. */
     829                 :           1 :         UnlockReleaseBuffer(MetaBuffer);
     830                 :           1 :         UnlockReleaseBuffer(RootBuffer);
     831                 :           1 : }
     832                 :             : 
     833                 :             : /*
     834                 :             :  * Insert index entries for a single indexable item during "normal"
     835                 :             :  * (non-fast-update) insertion
     836                 :             :  */
     837                 :             : static void
     838                 :        2000 : ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
     839                 :             :                                    Datum value, bool isNull,
     840                 :             :                                    ItemPointer item)
     841                 :             : {
     842                 :        2000 :         Datum      *entries;
     843                 :        2000 :         GinNullCategory *categories;
     844                 :        2000 :         int32           i,
     845                 :             :                                 nentries;
     846                 :             : 
     847                 :        2000 :         entries = ginExtractEntries(ginstate, attnum, value, isNull,
     848                 :             :                                                                 &nentries, &categories);
     849                 :             : 
     850         [ +  + ]:        7996 :         for (i = 0; i < nentries; i++)
     851                 :       11992 :                 ginEntryInsert(ginstate, attnum, entries[i], categories[i],
     852                 :        5996 :                                            item, 1, NULL);
     853                 :        2000 : }
     854                 :             : 
     855                 :             : bool
     856                 :       45950 : gininsert(Relation index, Datum *values, bool *isnull,
     857                 :             :                   ItemPointer ht_ctid, Relation heapRel,
     858                 :             :                   IndexUniqueCheck checkUnique,
     859                 :             :                   bool indexUnchanged,
     860                 :             :                   IndexInfo *indexInfo)
     861                 :             : {
     862                 :       45950 :         GinState   *ginstate = (GinState *) indexInfo->ii_AmCache;
     863                 :       45950 :         MemoryContext oldCtx;
     864                 :       45950 :         MemoryContext insertCtx;
     865                 :       45950 :         int                     i;
     866                 :             : 
     867                 :             :         /* Initialize GinState cache if first call in this statement */
     868         [ +  + ]:       45950 :         if (ginstate == NULL)
     869                 :             :         {
     870                 :          16 :                 oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context);
     871                 :          16 :                 ginstate = palloc_object(GinState);
     872                 :          16 :                 initGinState(ginstate, index);
     873                 :          16 :                 indexInfo->ii_AmCache = ginstate;
     874                 :          16 :                 MemoryContextSwitchTo(oldCtx);
     875                 :          16 :         }
     876                 :             : 
     877                 :       45950 :         insertCtx = AllocSetContextCreate(CurrentMemoryContext,
     878                 :             :                                                                           "Gin insert temporary context",
     879                 :             :                                                                           ALLOCSET_DEFAULT_SIZES);
     880                 :             : 
     881                 :       45950 :         oldCtx = MemoryContextSwitchTo(insertCtx);
     882                 :             : 
     883   [ +  -  +  +  :       45950 :         if (GinGetUseFastUpdate(index))
                   +  + ]
     884                 :             :         {
     885                 :       43950 :                 GinTupleCollector collector;
     886                 :             : 
     887                 :       43950 :                 memset(&collector, 0, sizeof(GinTupleCollector));
     888                 :             : 
     889         [ +  + ]:      107913 :                 for (i = 0; i < ginstate->origTupdesc->natts; i++)
     890                 :      127926 :                         ginHeapTupleFastCollect(ginstate, &collector,
     891                 :       63963 :                                                                         (OffsetNumber) (i + 1),
     892                 :       63963 :                                                                         values[i], isnull[i],
     893                 :       63963 :                                                                         ht_ctid);
     894                 :             : 
     895                 :       43950 :                 ginHeapTupleFastInsert(ginstate, &collector);
     896                 :       43950 :         }
     897                 :             :         else
     898                 :             :         {
     899         [ +  + ]:        4000 :                 for (i = 0; i < ginstate->origTupdesc->natts; i++)
     900                 :        4000 :                         ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1),
     901                 :        2000 :                                                            values[i], isnull[i],
     902                 :        2000 :                                                            ht_ctid);
     903                 :             :         }
     904                 :             : 
     905                 :       45950 :         MemoryContextSwitchTo(oldCtx);
     906                 :       45950 :         MemoryContextDelete(insertCtx);
     907                 :             : 
     908                 :       45950 :         return false;
     909                 :       45950 : }
     910                 :             : 
     911                 :             : /*
     912                 :             :  * Create parallel context, and launch workers for leader.
     913                 :             :  *
     914                 :             :  * buildstate argument should be initialized (with the exception of the
     915                 :             :  * tuplesort states, which may later be created based on shared
     916                 :             :  * state initially set up here).
     917                 :             :  *
     918                 :             :  * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
     919                 :             :  *
     920                 :             :  * request is the target number of parallel worker processes to launch.
     921                 :             :  *
     922                 :             :  * Sets buildstate's GinLeader, which caller must use to shut down parallel
     923                 :             :  * mode by passing it to _gin_end_parallel() at the very end of its index
     924                 :             :  * build.  If not even a single worker process can be launched, this is
     925                 :             :  * never set, and caller should proceed with a serial index build.
     926                 :             :  */
     927                 :             : static void
     928                 :           0 : _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
     929                 :             :                                         bool isconcurrent, int request)
     930                 :             : {
     931                 :           0 :         ParallelContext *pcxt;
     932                 :           0 :         int                     scantuplesortstates;
     933                 :           0 :         Snapshot        snapshot;
     934                 :           0 :         Size            estginshared;
     935                 :           0 :         Size            estsort;
     936                 :           0 :         GinBuildShared *ginshared;
     937                 :           0 :         Sharedsort *sharedsort;
     938                 :           0 :         GinLeader  *ginleader = palloc0_object(GinLeader);
     939                 :           0 :         WalUsage   *walusage;
     940                 :           0 :         BufferUsage *bufferusage;
     941                 :           0 :         bool            leaderparticipates = true;
     942                 :           0 :         int                     querylen;
     943                 :             : 
     944                 :             : #ifdef DISABLE_LEADER_PARTICIPATION
     945                 :             :         leaderparticipates = false;
     946                 :             : #endif
     947                 :             : 
     948                 :             :         /*
     949                 :             :          * Enter parallel mode, and create context for parallel build of gin index
     950                 :             :          */
     951                 :           0 :         EnterParallelMode();
     952         [ #  # ]:           0 :         Assert(request > 0);
     953                 :           0 :         pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
     954                 :           0 :                                                                  request);
     955                 :             : 
     956         [ #  # ]:           0 :         scantuplesortstates = leaderparticipates ? request + 1 : request;
     957                 :             : 
     958                 :             :         /*
     959                 :             :          * Prepare for scan of the base relation.  In a normal index build, we use
     960                 :             :          * SnapshotAny because we must retrieve all tuples and do our own time
     961                 :             :          * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
     962                 :             :          * concurrent build, we take a regular MVCC snapshot and index whatever's
     963                 :             :          * live according to that.
     964                 :             :          */
     965         [ #  # ]:           0 :         if (!isconcurrent)
     966                 :           0 :                 snapshot = SnapshotAny;
     967                 :             :         else
     968                 :           0 :                 snapshot = RegisterSnapshot(GetTransactionSnapshot());
     969                 :             : 
     970                 :             :         /*
     971                 :             :          * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
     972                 :             :          */
     973                 :           0 :         estginshared = _gin_parallel_estimate_shared(heap, snapshot);
     974                 :           0 :         shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
     975                 :           0 :         estsort = tuplesort_estimate_shared(scantuplesortstates);
     976                 :           0 :         shm_toc_estimate_chunk(&pcxt->estimator, estsort);
     977                 :             : 
     978                 :           0 :         shm_toc_estimate_keys(&pcxt->estimator, 2);
     979                 :             : 
     980                 :             :         /*
     981                 :             :          * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
     982                 :             :          * and PARALLEL_KEY_BUFFER_USAGE.
     983                 :             :          *
     984                 :             :          * If there are no extensions loaded that care, we could skip this.  We
     985                 :             :          * have no way of knowing whether anyone's looking at pgWalUsage or
     986                 :             :          * pgBufferUsage, so do it unconditionally.
     987                 :             :          */
     988                 :           0 :         shm_toc_estimate_chunk(&pcxt->estimator,
     989                 :             :                                                    mul_size(sizeof(WalUsage), pcxt->nworkers));
     990                 :           0 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
     991                 :           0 :         shm_toc_estimate_chunk(&pcxt->estimator,
     992                 :             :                                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
     993                 :           0 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
     994                 :             : 
     995                 :             :         /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
     996         [ #  # ]:           0 :         if (debug_query_string)
     997                 :             :         {
     998                 :           0 :                 querylen = strlen(debug_query_string);
     999                 :           0 :                 shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
    1000                 :           0 :                 shm_toc_estimate_keys(&pcxt->estimator, 1);
    1001                 :           0 :         }
    1002                 :             :         else
    1003                 :           0 :                 querylen = 0;                   /* keep compiler quiet */
    1004                 :             : 
    1005                 :             :         /* Everyone's had a chance to ask for space, so now create the DSM */
    1006                 :           0 :         InitializeParallelDSM(pcxt);
    1007                 :             : 
    1008                 :             :         /* If no DSM segment was available, back out (do serial build) */
    1009         [ #  # ]:           0 :         if (pcxt->seg == NULL)
    1010                 :             :         {
    1011   [ #  #  #  # ]:           0 :                 if (IsMVCCSnapshot(snapshot))
    1012                 :           0 :                         UnregisterSnapshot(snapshot);
    1013                 :           0 :                 DestroyParallelContext(pcxt);
    1014                 :           0 :                 ExitParallelMode();
    1015                 :           0 :                 return;
    1016                 :             :         }
    1017                 :             : 
    1018                 :             :         /* Store shared build state, for which we reserved space */
    1019                 :           0 :         ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
    1020                 :             :         /* Initialize immutable state */
    1021                 :           0 :         ginshared->heaprelid = RelationGetRelid(heap);
    1022                 :           0 :         ginshared->indexrelid = RelationGetRelid(index);
    1023                 :           0 :         ginshared->isconcurrent = isconcurrent;
    1024                 :           0 :         ginshared->scantuplesortstates = scantuplesortstates;
    1025                 :             : 
    1026                 :           0 :         ConditionVariableInit(&ginshared->workersdonecv);
    1027                 :           0 :         SpinLockInit(&ginshared->mutex);
    1028                 :             : 
    1029                 :             :         /* Initialize mutable state */
    1030                 :           0 :         ginshared->nparticipantsdone = 0;
    1031                 :           0 :         ginshared->reltuples = 0.0;
    1032                 :           0 :         ginshared->indtuples = 0.0;
    1033                 :             : 
    1034                 :           0 :         table_parallelscan_initialize(heap,
    1035                 :           0 :                                                                   ParallelTableScanFromGinBuildShared(ginshared),
    1036                 :           0 :                                                                   snapshot);
    1037                 :             : 
    1038                 :             :         /*
    1039                 :             :          * Store shared tuplesort-private state, for which we reserved space.
    1040                 :             :          * Then, initialize opaque state using tuplesort routine.
    1041                 :             :          */
    1042                 :           0 :         sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
    1043                 :           0 :         tuplesort_initialize_shared(sharedsort, scantuplesortstates,
    1044                 :           0 :                                                                 pcxt->seg);
    1045                 :             : 
    1046                 :           0 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
    1047                 :           0 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
    1048                 :             : 
    1049                 :             :         /* Store query string for workers */
    1050         [ #  # ]:           0 :         if (debug_query_string)
    1051                 :             :         {
    1052                 :           0 :                 char       *sharedquery;
    1053                 :             : 
    1054                 :           0 :                 sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
    1055                 :           0 :                 memcpy(sharedquery, debug_query_string, querylen + 1);
    1056                 :           0 :                 shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
    1057                 :           0 :         }
    1058                 :             : 
    1059                 :             :         /*
    1060                 :             :          * Allocate space for each worker's WalUsage and BufferUsage; no need to
    1061                 :             :          * initialize.
    1062                 :             :          */
    1063                 :           0 :         walusage = shm_toc_allocate(pcxt->toc,
    1064                 :           0 :                                                                 mul_size(sizeof(WalUsage), pcxt->nworkers));
    1065                 :           0 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
    1066                 :           0 :         bufferusage = shm_toc_allocate(pcxt->toc,
    1067                 :           0 :                                                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
    1068                 :           0 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
    1069                 :             : 
    1070                 :             :         /* Launch workers, saving status for leader/caller */
    1071                 :           0 :         LaunchParallelWorkers(pcxt);
    1072                 :           0 :         ginleader->pcxt = pcxt;
    1073                 :           0 :         ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
    1074         [ #  # ]:           0 :         if (leaderparticipates)
    1075                 :           0 :                 ginleader->nparticipanttuplesorts++;
    1076                 :           0 :         ginleader->ginshared = ginshared;
    1077                 :           0 :         ginleader->sharedsort = sharedsort;
    1078                 :           0 :         ginleader->snapshot = snapshot;
    1079                 :           0 :         ginleader->walusage = walusage;
    1080                 :           0 :         ginleader->bufferusage = bufferusage;
    1081                 :             : 
    1082                 :             :         /* If no workers were successfully launched, back out (do serial build) */
    1083         [ #  # ]:           0 :         if (pcxt->nworkers_launched == 0)
    1084                 :             :         {
    1085                 :           0 :                 _gin_end_parallel(ginleader, NULL);
    1086                 :           0 :                 return;
    1087                 :             :         }
    1088                 :             : 
    1089                 :             :         /* Save leader state now that it's clear build will be parallel */
    1090                 :           0 :         buildstate->bs_leader = ginleader;
    1091                 :             : 
    1092                 :             :         /* Join heap scan ourselves */
    1093         [ #  # ]:           0 :         if (leaderparticipates)
    1094                 :           0 :                 _gin_leader_participate_as_worker(buildstate, heap, index);
    1095                 :             : 
    1096                 :             :         /*
    1097                 :             :          * Caller needs to wait for all launched workers when we return.  Make
    1098                 :             :          * sure that the failure-to-start case will not hang forever.
    1099                 :             :          */
    1100                 :           0 :         WaitForParallelWorkersToAttach(pcxt);
    1101         [ #  # ]:           0 : }
    1102                 :             : 
    1103                 :             : /*
    1104                 :             :  * Shut down workers, destroy parallel context, and end parallel mode.
    1105                 :             :  */
    1106                 :             : static void
    1107                 :           0 : _gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
    1108                 :             : {
    1109                 :           0 :         int                     i;
    1110                 :             : 
    1111                 :             :         /* Shutdown worker processes */
    1112                 :           0 :         WaitForParallelWorkersToFinish(ginleader->pcxt);
    1113                 :             : 
    1114                 :             :         /*
    1115                 :             :          * Next, accumulate WAL usage.  (This must wait for the workers to finish,
    1116                 :             :          * or we might get incomplete data.)
    1117                 :             :          */
    1118         [ #  # ]:           0 :         for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
    1119                 :           0 :                 InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
    1120                 :             : 
    1121                 :             :         /* Free last reference to MVCC snapshot, if one was used */
    1122   [ #  #  #  # ]:           0 :         if (IsMVCCSnapshot(ginleader->snapshot))
    1123                 :           0 :                 UnregisterSnapshot(ginleader->snapshot);
    1124                 :           0 :         DestroyParallelContext(ginleader->pcxt);
    1125                 :           0 :         ExitParallelMode();
    1126                 :           0 : }
    1127                 :             : 
    1128                 :             : /*
    1129                 :             :  * Within leader, wait for end of heap scan.
    1130                 :             :  *
    1131                 :             :  * When called, parallel heap scan started by _gin_begin_parallel() will
    1132                 :             :  * already be underway within worker processes (when leader participates
    1133                 :             :  * as a worker, we should end up here just as workers are finishing).
    1134                 :             :  *
    1135                 :             :  * Returns the total number of heap tuples scanned.
    1136                 :             :  */
    1137                 :             : static double
    1138                 :           0 : _gin_parallel_heapscan(GinBuildState *state)
    1139                 :             : {
    1140                 :           0 :         GinBuildShared *ginshared = state->bs_leader->ginshared;
    1141                 :           0 :         int                     nparticipanttuplesorts;
    1142                 :             : 
    1143                 :           0 :         nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
    1144                 :           0 :         for (;;)
    1145                 :             :         {
    1146         [ #  # ]:           0 :                 SpinLockAcquire(&ginshared->mutex);
    1147         [ #  # ]:           0 :                 if (ginshared->nparticipantsdone == nparticipanttuplesorts)
    1148                 :             :                 {
    1149                 :             :                         /* copy the data into leader state */
    1150                 :           0 :                         state->bs_reltuples = ginshared->reltuples;
    1151                 :           0 :                         state->bs_numtuples = ginshared->indtuples;
    1152                 :             : 
    1153                 :           0 :                         SpinLockRelease(&ginshared->mutex);
    1154                 :           0 :                         break;
    1155                 :             :                 }
    1156                 :           0 :                 SpinLockRelease(&ginshared->mutex);
    1157                 :             : 
    1158                 :           0 :                 ConditionVariableSleep(&ginshared->workersdonecv,
    1159                 :             :                                                            WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
    1160                 :             :         }
    1161                 :             : 
    1162                 :           0 :         ConditionVariableCancelSleep();
    1163                 :             : 
    1164                 :           0 :         return state->bs_reltuples;
    1165                 :           0 : }
    1166                 :             : 
    1167                 :             : /*
    1168                 :             :  * Buffer used to accumulate TIDs from multiple GinTuples for the same key
    1169                 :             :  * (we read these from the tuplesort, sorted by the key).
    1170                 :             :  *
    1171                 :             :  * This is similar to BuildAccumulator in that it's used to collect TIDs
    1172                 :             :  * in memory before inserting them into the index, but it's much simpler
    1173                 :             :  * as it only deals with a single index key at a time.
    1174                 :             :  *
    1175                 :             :  * When adding TIDs to the buffer, we make sure to keep them sorted, both
    1176                 :             :  * during the initial table scan (and detecting when the scan wraps around),
    1177                 :             :  * and during merging (where we do mergesort).
    1178                 :             :  */
    1179                 :             : typedef struct GinBuffer
    1180                 :             : {
    1181                 :             :         OffsetNumber attnum;
    1182                 :             :         GinNullCategory category;
    1183                 :             :         Datum           key;                    /* 0 if no key (and keylen == 0) */
    1184                 :             :         Size            keylen;                 /* number of bytes (not typlen) */
    1185                 :             : 
    1186                 :             :         /* type info */
    1187                 :             :         int16           typlen;
    1188                 :             :         bool            typbyval;
    1189                 :             : 
    1190                 :             :         /* Number of TIDs to collect before attempt to write some out. */
    1191                 :             :         int                     maxitems;
    1192                 :             : 
    1193                 :             :         /* array of TID values */
    1194                 :             :         int                     nitems;
    1195                 :             :         int                     nfrozen;
    1196                 :             :         SortSupport ssup;                       /* for sorting/comparing keys */
    1197                 :             :         ItemPointerData *items;
    1198                 :             : } GinBuffer;
    1199                 :             : 
    1200                 :             : /*
    1201                 :             :  * Check that TID array contains valid values, and that it's sorted (if we
    1202                 :             :  * expect it to be).
    1203                 :             :  */
    1204                 :             : static void
    1205                 :           0 : AssertCheckItemPointers(GinBuffer *buffer)
    1206                 :             : {
    1207                 :             : #ifdef USE_ASSERT_CHECKING
    1208                 :             :         /* we should not have a buffer with no TIDs to sort */
    1209         [ #  # ]:           0 :         Assert(buffer->items != NULL);
    1210         [ #  # ]:           0 :         Assert(buffer->nitems > 0);
    1211                 :             : 
    1212         [ #  # ]:           0 :         for (int i = 0; i < buffer->nitems; i++)
    1213                 :             :         {
    1214         [ #  # ]:           0 :                 Assert(ItemPointerIsValid(&buffer->items[i]));
    1215                 :             : 
    1216                 :             :                 /* don't check ordering for the first TID item */
    1217         [ #  # ]:           0 :                 if (i == 0)
    1218                 :           0 :                         continue;
    1219                 :             : 
    1220         [ #  # ]:           0 :                 Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
    1221                 :           0 :         }
    1222                 :             : #endif
    1223                 :           0 : }
    1224                 :             : 
    1225                 :             : /*
    1226                 :             :  * GinBuffer checks
    1227                 :             :  *
    1228                 :             :  * Make sure the nitems/items fields are consistent (either the array is empty
    1229                 :             :  * or not empty, the fields need to agree). If there are items, check ordering.
    1230                 :             :  */
    1231                 :             : static void
    1232                 :           0 : AssertCheckGinBuffer(GinBuffer *buffer)
    1233                 :             : {
    1234                 :             : #ifdef USE_ASSERT_CHECKING
    1235                 :             :         /* if we have any items, the array must exist */
    1236   [ #  #  #  # ]:           0 :         Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
    1237                 :             : 
    1238                 :             :         /*
    1239                 :             :          * The buffer may be empty, in which case we must not call the check of
    1240                 :             :          * item pointers, because that assumes non-emptiness.
    1241                 :             :          */
    1242         [ #  # ]:           0 :         if (buffer->nitems == 0)
    1243                 :           0 :                 return;
    1244                 :             : 
    1245                 :             :         /* Make sure the item pointers are valid and sorted. */
    1246                 :           0 :         AssertCheckItemPointers(buffer);
    1247                 :             : #endif
    1248                 :           0 : }
    1249                 :             : 
    1250                 :             : /*
    1251                 :             :  * GinBufferInit
    1252                 :             :  *              Initialize buffer to store tuples for a GIN index.
    1253                 :             :  *
    1254                 :             :  * Initialize the buffer used to accumulate TID for a single key at a time
    1255                 :             :  * (we process the data sorted), so we know when we received all data for
    1256                 :             :  * a given key.
    1257                 :             :  *
    1258                 :             :  * Initializes sort support procedures for all index attributes.
    1259                 :             :  */
    1260                 :             : static GinBuffer *
    1261                 :           0 : GinBufferInit(Relation index)
    1262                 :             : {
    1263                 :           0 :         GinBuffer  *buffer = palloc0_object(GinBuffer);
    1264                 :           0 :         int                     i,
    1265                 :             :                                 nKeys;
    1266                 :           0 :         TupleDesc       desc = RelationGetDescr(index);
    1267                 :             : 
    1268                 :             :         /*
    1269                 :             :          * How many items can we fit into the memory limit? We don't want to end
    1270                 :             :          * with too many TIDs. and 64kB seems more than enough. But maybe this
    1271                 :             :          * should be tied to maintenance_work_mem or something like that?
    1272                 :             :          */
    1273                 :           0 :         buffer->maxitems = (64 * 1024L) / sizeof(ItemPointerData);
    1274                 :             : 
    1275                 :           0 :         nKeys = IndexRelationGetNumberOfKeyAttributes(index);
    1276                 :             : 
    1277                 :           0 :         buffer->ssup = palloc0_array(SortSupportData, nKeys);
    1278                 :             : 
    1279                 :             :         /*
    1280                 :             :          * Lookup ordering operator for the index key data type, and initialize
    1281                 :             :          * the sort support function.
    1282                 :             :          */
    1283         [ #  # ]:           0 :         for (i = 0; i < nKeys; i++)
    1284                 :             :         {
    1285                 :           0 :                 Oid                     cmpFunc;
    1286                 :           0 :                 SortSupport sortKey = &buffer->ssup[i];
    1287                 :           0 :                 Form_pg_attribute att = TupleDescAttr(desc, i);
    1288                 :             : 
    1289                 :           0 :                 sortKey->ssup_cxt = CurrentMemoryContext;
    1290                 :           0 :                 sortKey->ssup_collation = index->rd_indcollation[i];
    1291                 :             : 
    1292         [ #  # ]:           0 :                 if (!OidIsValid(sortKey->ssup_collation))
    1293                 :           0 :                         sortKey->ssup_collation = DEFAULT_COLLATION_OID;
    1294                 :             : 
    1295                 :           0 :                 sortKey->ssup_nulls_first = false;
    1296                 :           0 :                 sortKey->ssup_attno = i + 1;
    1297                 :           0 :                 sortKey->abbreviate = false;
    1298                 :             : 
    1299         [ #  # ]:           0 :                 Assert(sortKey->ssup_attno != 0);
    1300                 :             : 
    1301                 :             :                 /*
    1302                 :             :                  * If the compare proc isn't specified in the opclass definition, look
    1303                 :             :                  * up the index key type's default btree comparator.
    1304                 :             :                  */
    1305                 :           0 :                 cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
    1306         [ #  # ]:           0 :                 if (cmpFunc == InvalidOid)
    1307                 :             :                 {
    1308                 :           0 :                         TypeCacheEntry *typentry;
    1309                 :             : 
    1310                 :           0 :                         typentry = lookup_type_cache(att->atttypid,
    1311                 :             :                                                                                  TYPECACHE_CMP_PROC_FINFO);
    1312         [ #  # ]:           0 :                         if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
    1313   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
    1314                 :             :                                                 (errcode(ERRCODE_UNDEFINED_FUNCTION),
    1315                 :             :                                                  errmsg("could not identify a comparison function for type %s",
    1316                 :             :                                                                 format_type_be(att->atttypid))));
    1317                 :             : 
    1318                 :           0 :                         cmpFunc = typentry->cmp_proc_finfo.fn_oid;
    1319                 :           0 :                 }
    1320                 :             : 
    1321                 :           0 :                 PrepareSortSupportComparisonShim(cmpFunc, sortKey);
    1322                 :           0 :         }
    1323                 :             : 
    1324                 :           0 :         return buffer;
    1325                 :           0 : }
    1326                 :             : 
    1327                 :             : /* Is the buffer empty, i.e. has no TID values in the array? */
    1328                 :             : static bool
    1329                 :           0 : GinBufferIsEmpty(GinBuffer *buffer)
    1330                 :             : {
    1331                 :           0 :         return (buffer->nitems == 0);
    1332                 :             : }
    1333                 :             : 
    1334                 :             : /*
    1335                 :             :  * GinBufferKeyEquals
    1336                 :             :  *              Can the buffer store TIDs for the provided GIN tuple (same key)?
    1337                 :             :  *
    1338                 :             :  * Compare if the tuple matches the already accumulated data in the GIN
    1339                 :             :  * buffer. Compare scalar fields first, before the actual key.
    1340                 :             :  *
    1341                 :             :  * Returns true if the key matches, and the TID belongs to the buffer, or
    1342                 :             :  * false if the key does not match.
    1343                 :             :  */
    1344                 :             : static bool
    1345                 :           0 : GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
    1346                 :             : {
    1347                 :           0 :         int                     r;
    1348                 :           0 :         Datum           tupkey;
    1349                 :             : 
    1350                 :           0 :         AssertCheckGinBuffer(buffer);
    1351                 :             : 
    1352         [ #  # ]:           0 :         if (tup->attrnum != buffer->attnum)
    1353                 :           0 :                 return false;
    1354                 :             : 
    1355                 :             :         /* same attribute should have the same type info */
    1356         [ #  # ]:           0 :         Assert(tup->typbyval == buffer->typbyval);
    1357         [ #  # ]:           0 :         Assert(tup->typlen == buffer->typlen);
    1358                 :             : 
    1359         [ #  # ]:           0 :         if (tup->category != buffer->category)
    1360                 :           0 :                 return false;
    1361                 :             : 
    1362                 :             :         /*
    1363                 :             :          * For NULL/empty keys, this means equality, for normal keys we need to
    1364                 :             :          * compare the actual key value.
    1365                 :             :          */
    1366         [ #  # ]:           0 :         if (buffer->category != GIN_CAT_NORM_KEY)
    1367                 :           0 :                 return true;
    1368                 :             : 
    1369                 :             :         /*
    1370                 :             :          * For the tuple, get either the first sizeof(Datum) bytes for byval
    1371                 :             :          * types, or a pointer to the beginning of the data array.
    1372                 :             :          */
    1373         [ #  # ]:           0 :         tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
    1374                 :             : 
    1375                 :           0 :         r = ApplySortComparator(buffer->key, false,
    1376                 :           0 :                                                         tupkey, false,
    1377                 :           0 :                                                         &buffer->ssup[buffer->attnum - 1]);
    1378                 :             : 
    1379                 :           0 :         return (r == 0);
    1380                 :           0 : }
    1381                 :             : 
    1382                 :             : /*
    1383                 :             :  * GinBufferShouldTrim
    1384                 :             :  *              Should we trim the list of item pointers?
    1385                 :             :  *
    1386                 :             :  * By trimming we understand writing out and removing the tuple IDs that
    1387                 :             :  * we know can't change by future merges. We can deduce the TID up to which
    1388                 :             :  * this is guaranteed from the "first" TID in each GIN tuple, which provides
    1389                 :             :  * a "horizon" (for a given key) thanks to the sort.
    1390                 :             :  *
    1391                 :             :  * We don't want to do this too often - compressing longer TID lists is more
    1392                 :             :  * efficient. But we also don't want to accumulate too many TIDs, for two
    1393                 :             :  * reasons. First, it consumes memory and we might exceed maintenance_work_mem
    1394                 :             :  * (or whatever limit applies), even if that's unlikely because TIDs are very
    1395                 :             :  * small so we can fit a lot of them. Second, and more importantly, long TID
    1396                 :             :  * lists are an issue if the scan wraps around, because a key may get a very
    1397                 :             :  * wide list (with min/max TID for that key), forcing "full" mergesorts for
    1398                 :             :  * every list merged into it (instead of the efficient append).
    1399                 :             :  *
    1400                 :             :  * So we look at two things when deciding if to trim - if the resulting list
    1401                 :             :  * (after adding TIDs from the new tuple) would be too long, and if there is
    1402                 :             :  * enough TIDs to trim (with values less than "first" TID from the new tuple),
    1403                 :             :  * we do the trim. By enough we mean at least 128 TIDs (mostly an arbitrary
    1404                 :             :  * number).
    1405                 :             :  *
    1406                 :             :  * We try freezing TIDs at the beginning of the list first, before attempting
    1407                 :             :  * to trim the buffer. This may allow trimming the data earlier, reducing the
    1408                 :             :  * memory usage and excluding it from the mergesort.
    1409                 :             :  */
    1410                 :             : static bool
    1411                 :           0 : GinBufferShouldTrim(GinBuffer *buffer, GinTuple *tup)
    1412                 :             : {
    1413                 :             :         /*
    1414                 :             :          * Check if the last TID in the current list is frozen. This is the case
    1415                 :             :          * when merging non-overlapping lists, e.g. in each parallel worker.
    1416                 :             :          */
    1417   [ #  #  #  # ]:           0 :         if ((buffer->nitems > 0) &&
    1418                 :           0 :                 (ItemPointerCompare(&buffer->items[buffer->nitems - 1],
    1419                 :           0 :                                                         GinTupleGetFirst(tup)) == 0))
    1420                 :           0 :                 buffer->nfrozen = buffer->nitems;
    1421                 :             : 
    1422                 :             :         /*
    1423                 :             :          * Now find the last TID we know to be frozen, i.e. the last TID right
    1424                 :             :          * before the new GIN tuple.
    1425                 :             :          *
    1426                 :             :          * Start with the first not-yet-frozen tuple, and walk until we find the
    1427                 :             :          * first TID that's higher. If we already know the whole list is frozen
    1428                 :             :          * (i.e. nfrozen == nitems), this does nothing.
    1429                 :             :          *
    1430                 :             :          * XXX This might do a binary search for sufficiently long lists, but it
    1431                 :             :          * does not seem worth the complexity. Overlapping lists should be rare
    1432                 :             :          * common, TID comparisons are cheap, and we should quickly freeze most of
    1433                 :             :          * the list.
    1434                 :             :          */
    1435         [ #  # ]:           0 :         for (int i = buffer->nfrozen; i < buffer->nitems; i++)
    1436                 :             :         {
    1437                 :             :                 /* Is the TID after the first TID of the new tuple? Can't freeze. */
    1438                 :           0 :                 if (ItemPointerCompare(&buffer->items[i],
    1439   [ #  #  #  # ]:           0 :                                                            GinTupleGetFirst(tup)) > 0)
    1440                 :           0 :                         break;
    1441                 :             : 
    1442                 :           0 :                 buffer->nfrozen++;
    1443                 :           0 :         }
    1444                 :             : 
    1445                 :             :         /* not enough TIDs to trim (1024 is somewhat arbitrary number) */
    1446         [ #  # ]:           0 :         if (buffer->nfrozen < 1024)
    1447                 :           0 :                 return false;
    1448                 :             : 
    1449                 :             :         /* no need to trim if we have not hit the memory limit yet */
    1450         [ #  # ]:           0 :         if ((buffer->nitems + tup->nitems) < buffer->maxitems)
    1451                 :           0 :                 return false;
    1452                 :             : 
    1453                 :             :         /*
    1454                 :             :          * OK, we have enough frozen TIDs to flush, and we have hit the memory
    1455                 :             :          * limit, so it's time to write it out.
    1456                 :             :          */
    1457                 :           0 :         return true;
    1458                 :           0 : }
    1459                 :             : 
    1460                 :             : /*
    1461                 :             :  * GinBufferStoreTuple
    1462                 :             :  *              Add data (especially TID list) from a GIN tuple to the buffer.
    1463                 :             :  *
    1464                 :             :  * The buffer is expected to be empty (in which case it's initialized), or
    1465                 :             :  * having the same key. The TID values from the tuple are combined with the
    1466                 :             :  * stored values using a merge sort.
    1467                 :             :  *
    1468                 :             :  * The tuples (for the same key) are expected to be sorted by first TID. But
    1469                 :             :  * this does not guarantee the lists do not overlap, especially in the leader,
    1470                 :             :  * because the workers process interleaving data. There should be no overlaps
    1471                 :             :  * in a single worker - it could happen when the parallel scan wraps around,
    1472                 :             :  * but we detect that and flush the data (see ginBuildCallbackParallel).
    1473                 :             :  *
    1474                 :             :  * By sorting the GinTuple not only by key, but also by the first TID, we make
    1475                 :             :  * it more less likely the lists will overlap during merge. We merge them using
    1476                 :             :  * mergesort, but it's cheaper to just append one list to the other.
    1477                 :             :  *
    1478                 :             :  * How often can the lists overlap? There should be no overlaps in workers,
    1479                 :             :  * and in the leader we can see overlaps between lists built by different
    1480                 :             :  * workers. But the workers merge the items as much as possible, so there
    1481                 :             :  * should not be too many.
    1482                 :             :  */
    1483                 :             : static void
    1484                 :           0 : GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
    1485                 :             : {
    1486                 :           0 :         ItemPointerData *items;
    1487                 :           0 :         Datum           key;
    1488                 :             : 
    1489                 :           0 :         AssertCheckGinBuffer(buffer);
    1490                 :             : 
    1491                 :           0 :         key = _gin_parse_tuple_key(tup);
    1492                 :           0 :         items = _gin_parse_tuple_items(tup);
    1493                 :             : 
    1494                 :             :         /* if the buffer is empty, set the fields (and copy the key) */
    1495         [ #  # ]:           0 :         if (GinBufferIsEmpty(buffer))
    1496                 :             :         {
    1497                 :           0 :                 buffer->category = tup->category;
    1498                 :           0 :                 buffer->keylen = tup->keylen;
    1499                 :           0 :                 buffer->attnum = tup->attrnum;
    1500                 :             : 
    1501                 :           0 :                 buffer->typlen = tup->typlen;
    1502                 :           0 :                 buffer->typbyval = tup->typbyval;
    1503                 :             : 
    1504         [ #  # ]:           0 :                 if (tup->category == GIN_CAT_NORM_KEY)
    1505                 :           0 :                         buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
    1506                 :             :                 else
    1507                 :           0 :                         buffer->key = (Datum) 0;
    1508                 :           0 :         }
    1509                 :             : 
    1510                 :             :         /* add the new TIDs into the buffer, combine using merge-sort */
    1511                 :             :         {
    1512                 :           0 :                 int                     nnew;
    1513                 :           0 :                 ItemPointer new;
    1514                 :             : 
    1515                 :             :                 /*
    1516                 :             :                  * Resize the array - we do this first, because we'll dereference the
    1517                 :             :                  * first unfrozen TID, which would fail if the array is NULL. We'll
    1518                 :             :                  * still pass 0 as number of elements in that array though.
    1519                 :             :                  */
    1520         [ #  # ]:           0 :                 if (buffer->items == NULL)
    1521                 :           0 :                         buffer->items = palloc((buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1522                 :             :                 else
    1523                 :           0 :                         buffer->items = repalloc(buffer->items,
    1524                 :           0 :                                                                          (buffer->nitems + tup->nitems) * sizeof(ItemPointerData));
    1525                 :             : 
    1526                 :           0 :                 new = ginMergeItemPointers(&buffer->items[buffer->nfrozen], /* first unfrozen */
    1527                 :           0 :                                                                    (buffer->nitems - buffer->nfrozen),    /* num of unfrozen */
    1528                 :           0 :                                                                    items, tup->nitems, &nnew);
    1529                 :             : 
    1530         [ #  # ]:           0 :                 Assert(nnew == (tup->nitems + (buffer->nitems - buffer->nfrozen)));
    1531                 :             : 
    1532                 :           0 :                 memcpy(&buffer->items[buffer->nfrozen], new,
    1533                 :             :                            nnew * sizeof(ItemPointerData));
    1534                 :             : 
    1535                 :           0 :                 pfree(new);
    1536                 :             : 
    1537                 :           0 :                 buffer->nitems += tup->nitems;
    1538                 :             : 
    1539                 :           0 :                 AssertCheckItemPointers(buffer);
    1540                 :           0 :         }
    1541                 :             : 
    1542                 :             :         /* free the decompressed TID list */
    1543                 :           0 :         pfree(items);
    1544                 :           0 : }
    1545                 :             : 
    1546                 :             : /*
    1547                 :             :  * GinBufferReset
    1548                 :             :  *              Reset the buffer into a state as if it contains no data.
    1549                 :             :  */
    1550                 :             : static void
    1551                 :           0 : GinBufferReset(GinBuffer *buffer)
    1552                 :             : {
    1553         [ #  # ]:           0 :         Assert(!GinBufferIsEmpty(buffer));
    1554                 :             : 
    1555                 :             :         /* release byref values, do nothing for by-val ones */
    1556   [ #  #  #  # ]:           0 :         if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1557                 :           0 :                 pfree(DatumGetPointer(buffer->key));
    1558                 :             : 
    1559                 :             :         /*
    1560                 :             :          * Not required, but makes it more likely to trigger NULL dereference if
    1561                 :             :          * using the value incorrectly, etc.
    1562                 :             :          */
    1563                 :           0 :         buffer->key = (Datum) 0;
    1564                 :             : 
    1565                 :           0 :         buffer->attnum = 0;
    1566                 :           0 :         buffer->category = 0;
    1567                 :           0 :         buffer->keylen = 0;
    1568                 :           0 :         buffer->nitems = 0;
    1569                 :           0 :         buffer->nfrozen = 0;
    1570                 :             : 
    1571                 :           0 :         buffer->typlen = 0;
    1572                 :           0 :         buffer->typbyval = 0;
    1573                 :           0 : }
    1574                 :             : 
    1575                 :             : /*
    1576                 :             :  * GinBufferTrim
    1577                 :             :  *              Discard the "frozen" part of the TID list (which should have been
    1578                 :             :  *              written to disk/index before this call).
    1579                 :             :  */
    1580                 :             : static void
    1581                 :           0 : GinBufferTrim(GinBuffer *buffer)
    1582                 :             : {
    1583         [ #  # ]:           0 :         Assert((buffer->nfrozen > 0) && (buffer->nfrozen <= buffer->nitems));
    1584                 :             : 
    1585                 :           0 :         memmove(&buffer->items[0], &buffer->items[buffer->nfrozen],
    1586                 :             :                         sizeof(ItemPointerData) * (buffer->nitems - buffer->nfrozen));
    1587                 :             : 
    1588                 :           0 :         buffer->nitems -= buffer->nfrozen;
    1589                 :           0 :         buffer->nfrozen = 0;
    1590                 :           0 : }
    1591                 :             : 
    1592                 :             : /*
    1593                 :             :  * GinBufferFree
    1594                 :             :  *              Release memory associated with the GinBuffer (including TID array).
    1595                 :             :  */
    1596                 :             : static void
    1597                 :           0 : GinBufferFree(GinBuffer *buffer)
    1598                 :             : {
    1599         [ #  # ]:           0 :         if (buffer->items)
    1600                 :           0 :                 pfree(buffer->items);
    1601                 :             : 
    1602                 :             :         /* release byref values, do nothing for by-val ones */
    1603         [ #  # ]:           0 :         if (!GinBufferIsEmpty(buffer) &&
    1604   [ #  #  #  # ]:           0 :                 (buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
    1605                 :           0 :                 pfree(DatumGetPointer(buffer->key));
    1606                 :             : 
    1607                 :           0 :         pfree(buffer);
    1608                 :           0 : }
    1609                 :             : 
    1610                 :             : /*
    1611                 :             :  * GinBufferCanAddKey
    1612                 :             :  *              Check if a given GIN tuple can be added to the current buffer.
    1613                 :             :  *
    1614                 :             :  * Returns true if the buffer is either empty or for the same index key.
    1615                 :             :  */
    1616                 :             : static bool
    1617                 :           0 : GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
    1618                 :             : {
    1619                 :             :         /* empty buffer can accept data for any key */
    1620         [ #  # ]:           0 :         if (GinBufferIsEmpty(buffer))
    1621                 :           0 :                 return true;
    1622                 :             : 
    1623                 :             :         /* otherwise just data for the same key */
    1624                 :           0 :         return GinBufferKeyEquals(buffer, tup);
    1625                 :           0 : }
    1626                 :             : 
    1627                 :             : /*
    1628                 :             :  * Within leader, wait for end of heap scan and merge per-worker results.
    1629                 :             :  *
    1630                 :             :  * After waiting for all workers to finish, merge the per-worker results into
    1631                 :             :  * the complete index. The results from each worker are sorted by block number
    1632                 :             :  * (start of the page range). While combining the per-worker results we merge
    1633                 :             :  * summaries for the same page range, and also fill-in empty summaries for
    1634                 :             :  * ranges without any tuples.
    1635                 :             :  *
    1636                 :             :  * Returns the total number of heap tuples scanned.
    1637                 :             :  */
    1638                 :             : static double
    1639                 :           0 : _gin_parallel_merge(GinBuildState *state)
    1640                 :             : {
    1641                 :           0 :         GinTuple   *tup;
    1642                 :           0 :         Size            tuplen;
    1643                 :           0 :         double          reltuples = 0;
    1644                 :           0 :         GinBuffer  *buffer;
    1645                 :             : 
    1646                 :             :         /* GIN tuples from workers, merged by leader */
    1647                 :           0 :         double          numtuples = 0;
    1648                 :             : 
    1649                 :             :         /* wait for workers to scan table and produce partial results */
    1650                 :           0 :         reltuples = _gin_parallel_heapscan(state);
    1651                 :             : 
    1652                 :             :         /* Execute the sort */
    1653                 :           0 :         pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1654                 :             :                                                                  PROGRESS_GIN_PHASE_PERFORMSORT_2);
    1655                 :             : 
    1656                 :             :         /* do the actual sort in the leader */
    1657                 :           0 :         tuplesort_performsort(state->bs_sortstate);
    1658                 :             : 
    1659                 :             :         /*
    1660                 :             :          * Initialize buffer to combine entries for the same key.
    1661                 :             :          *
    1662                 :             :          * The leader is allowed to use the whole maintenance_work_mem buffer to
    1663                 :             :          * combine data. The parallel workers already completed.
    1664                 :             :          */
    1665                 :           0 :         buffer = GinBufferInit(state->ginstate.index);
    1666                 :             : 
    1667                 :             :         /*
    1668                 :             :          * Set the progress target for the next phase.  Reset the block number
    1669                 :             :          * values set by table_index_build_scan
    1670                 :             :          */
    1671                 :             :         {
    1672                 :           0 :                 const int       progress_index[] = {
    1673                 :             :                         PROGRESS_CREATEIDX_SUBPHASE,
    1674                 :             :                         PROGRESS_CREATEIDX_TUPLES_TOTAL,
    1675                 :             :                         PROGRESS_SCAN_BLOCKS_TOTAL,
    1676                 :             :                         PROGRESS_SCAN_BLOCKS_DONE
    1677                 :             :                 };
    1678                 :           0 :                 const int64 progress_vals[] = {
    1679                 :             :                         PROGRESS_GIN_PHASE_MERGE_2,
    1680                 :           0 :                         state->bs_numtuples,
    1681                 :             :                         0, 0
    1682                 :             :                 };
    1683                 :             : 
    1684                 :           0 :                 pgstat_progress_update_multi_param(4, progress_index, progress_vals);
    1685                 :           0 :         }
    1686                 :             : 
    1687                 :             :         /*
    1688                 :             :          * Read the GIN tuples from the shared tuplesort, sorted by category and
    1689                 :             :          * key. That probably gives us order matching how data is organized in the
    1690                 :             :          * index.
    1691                 :             :          *
    1692                 :             :          * We don't insert the GIN tuples right away, but instead accumulate as
    1693                 :             :          * many TIDs for the same key as possible, and then insert that at once.
    1694                 :             :          * This way we don't need to decompress/recompress the posting lists, etc.
    1695                 :             :          */
    1696         [ #  # ]:           0 :         while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
    1697                 :             :         {
    1698                 :           0 :                 MemoryContext oldCtx;
    1699                 :             : 
    1700         [ #  # ]:           0 :                 CHECK_FOR_INTERRUPTS();
    1701                 :             : 
    1702                 :             :                 /*
    1703                 :             :                  * If the buffer can accept the new GIN tuple, just store it there and
    1704                 :             :                  * we're done. If it's a different key (or maybe too much data) flush
    1705                 :             :                  * the current contents into the index first.
    1706                 :             :                  */
    1707         [ #  # ]:           0 :                 if (!GinBufferCanAddKey(buffer, tup))
    1708                 :             :                 {
    1709                 :             :                         /*
    1710                 :             :                          * Buffer is not empty and it's storing a different key - flush
    1711                 :             :                          * the data into the insert, and start a new entry for current
    1712                 :             :                          * GinTuple.
    1713                 :             :                          */
    1714                 :           0 :                         AssertCheckItemPointers(buffer);
    1715                 :             : 
    1716                 :           0 :                         oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1717                 :             : 
    1718                 :           0 :                         ginEntryInsert(&state->ginstate,
    1719                 :           0 :                                                    buffer->attnum, buffer->key, buffer->category,
    1720                 :           0 :                                                    buffer->items, buffer->nitems, &state->buildStats);
    1721                 :             : 
    1722                 :           0 :                         MemoryContextSwitchTo(oldCtx);
    1723                 :           0 :                         MemoryContextReset(state->tmpCtx);
    1724                 :             : 
    1725                 :             :                         /* discard the existing data */
    1726                 :           0 :                         GinBufferReset(buffer);
    1727                 :           0 :                 }
    1728                 :             : 
    1729                 :             :                 /*
    1730                 :             :                  * We're about to add a GIN tuple to the buffer - check the memory
    1731                 :             :                  * limit first, and maybe write out some of the data into the index
    1732                 :             :                  * first, if needed (and possible). We only flush the part of the TID
    1733                 :             :                  * list that we know won't change, and only if there's enough data for
    1734                 :             :                  * compression to work well.
    1735                 :             :                  */
    1736         [ #  # ]:           0 :                 if (GinBufferShouldTrim(buffer, tup))
    1737                 :             :                 {
    1738         [ #  # ]:           0 :                         Assert(buffer->nfrozen > 0);
    1739                 :             : 
    1740                 :             :                         /*
    1741                 :             :                          * Buffer is not empty and it's storing a different key - flush
    1742                 :             :                          * the data into the insert, and start a new entry for current
    1743                 :             :                          * GinTuple.
    1744                 :             :                          */
    1745                 :           0 :                         AssertCheckItemPointers(buffer);
    1746                 :             : 
    1747                 :           0 :                         oldCtx = MemoryContextSwitchTo(state->tmpCtx);
    1748                 :             : 
    1749                 :           0 :                         ginEntryInsert(&state->ginstate,
    1750                 :           0 :                                                    buffer->attnum, buffer->key, buffer->category,
    1751                 :           0 :                                                    buffer->items, buffer->nfrozen, &state->buildStats);
    1752                 :             : 
    1753                 :           0 :                         MemoryContextSwitchTo(oldCtx);
    1754                 :           0 :                         MemoryContextReset(state->tmpCtx);
    1755                 :             : 
    1756                 :             :                         /* truncate the data we've just discarded */
    1757                 :           0 :                         GinBufferTrim(buffer);
    1758                 :           0 :                 }
    1759                 :             : 
    1760                 :             :                 /*
    1761                 :             :                  * Remember data for the current tuple (either remember the new key,
    1762                 :             :                  * or append if to the existing data).
    1763                 :             :                  */
    1764                 :           0 :                 GinBufferStoreTuple(buffer, tup);
    1765                 :             : 
    1766                 :             :                 /* Report progress */
    1767                 :           0 :                 pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1768                 :           0 :                                                                          ++numtuples);
    1769                 :           0 :         }
    1770                 :             : 
    1771                 :             :         /* flush data remaining in the buffer (for the last key) */
    1772         [ #  # ]:           0 :         if (!GinBufferIsEmpty(buffer))
    1773                 :             :         {
    1774                 :           0 :                 AssertCheckItemPointers(buffer);
    1775                 :             : 
    1776                 :           0 :                 ginEntryInsert(&state->ginstate,
    1777                 :           0 :                                            buffer->attnum, buffer->key, buffer->category,
    1778                 :           0 :                                            buffer->items, buffer->nitems, &state->buildStats);
    1779                 :             : 
    1780                 :             :                 /* discard the existing data */
    1781                 :           0 :                 GinBufferReset(buffer);
    1782                 :             : 
    1783                 :             :                 /* Report progress */
    1784                 :           0 :                 pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
    1785                 :           0 :                                                                          ++numtuples);
    1786                 :           0 :         }
    1787                 :             : 
    1788                 :             :         /* release all the memory */
    1789                 :           0 :         GinBufferFree(buffer);
    1790                 :             : 
    1791                 :           0 :         tuplesort_end(state->bs_sortstate);
    1792                 :             : 
    1793                 :           0 :         return reltuples;
    1794                 :           0 : }
    1795                 :             : 
    1796                 :             : /*
    1797                 :             :  * Returns size of shared memory required to store state for a parallel
    1798                 :             :  * gin index build based on the snapshot its parallel scan will use.
    1799                 :             :  */
    1800                 :             : static Size
    1801                 :           0 : _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
    1802                 :             : {
    1803                 :             :         /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
    1804                 :           0 :         return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
    1805                 :           0 :                                         table_parallelscan_estimate(heap, snapshot));
    1806                 :             : }
    1807                 :             : 
    1808                 :             : /*
    1809                 :             :  * Within leader, participate as a parallel worker.
    1810                 :             :  */
    1811                 :             : static void
    1812                 :           0 : _gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
    1813                 :             : {
    1814                 :           0 :         GinLeader  *ginleader = buildstate->bs_leader;
    1815                 :           0 :         int                     sortmem;
    1816                 :             : 
    1817                 :             :         /*
    1818                 :             :          * Might as well use reliable figure when doling out maintenance_work_mem
    1819                 :             :          * (when requested number of workers were not launched, this will be
    1820                 :             :          * somewhat higher than it is for other workers).
    1821                 :             :          */
    1822                 :           0 :         sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
    1823                 :             : 
    1824                 :             :         /* Perform work common to all participants */
    1825                 :           0 :         _gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
    1826                 :           0 :                                                                  ginleader->sharedsort, heap, index,
    1827                 :           0 :                                                                  sortmem, true);
    1828                 :           0 : }
    1829                 :             : 
    1830                 :             : /*
    1831                 :             :  * _gin_process_worker_data
    1832                 :             :  *              First phase of the key merging, happening in the worker.
    1833                 :             :  *
    1834                 :             :  * Depending on the number of distinct keys, the TID lists produced by the
    1835                 :             :  * callback may be very short (due to frequent evictions in the callback).
    1836                 :             :  * But combining many tiny lists is expensive, so we try to do as much as
    1837                 :             :  * possible in the workers and only then pass the results to the leader.
    1838                 :             :  *
    1839                 :             :  * We read the tuples sorted by the key, and merge them into larger lists.
    1840                 :             :  * At the moment there's no memory limit, so this will just produce one
    1841                 :             :  * huge (sorted) list per key in each worker. Which means the leader will
    1842                 :             :  * do a very limited number of mergesorts, which is good.
    1843                 :             :  */
    1844                 :             : static void
    1845                 :           0 : _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
    1846                 :             :                                                  bool progress)
    1847                 :             : {
    1848                 :           0 :         GinTuple   *tup;
    1849                 :           0 :         Size            tuplen;
    1850                 :             : 
    1851                 :           0 :         GinBuffer  *buffer;
    1852                 :             : 
    1853                 :             :         /*
    1854                 :             :          * Initialize buffer to combine entries for the same key.
    1855                 :             :          *
    1856                 :             :          * The workers are limited to the same amount of memory as during the sort
    1857                 :             :          * in ginBuildCallbackParallel. But this probably should be the 32MB used
    1858                 :             :          * during planning, just like there.
    1859                 :             :          */
    1860                 :           0 :         buffer = GinBufferInit(state->ginstate.index);
    1861                 :             : 
    1862                 :             :         /* sort the raw per-worker data */
    1863         [ #  # ]:           0 :         if (progress)
    1864                 :           0 :                 pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1865                 :             :                                                                          PROGRESS_GIN_PHASE_PERFORMSORT_1);
    1866                 :             : 
    1867                 :           0 :         tuplesort_performsort(state->bs_worker_sort);
    1868                 :             : 
    1869                 :             :         /* reset the number of GIN tuples produced by this worker */
    1870                 :           0 :         state->bs_numtuples = 0;
    1871                 :             : 
    1872         [ #  # ]:           0 :         if (progress)
    1873                 :           0 :                 pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
    1874                 :             :                                                                          PROGRESS_GIN_PHASE_MERGE_1);
    1875                 :             : 
    1876                 :             :         /*
    1877                 :             :          * Read the GIN tuples from the shared tuplesort, sorted by the key, and
    1878                 :             :          * merge them into larger chunks for the leader to combine.
    1879                 :             :          */
    1880         [ #  # ]:           0 :         while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
    1881                 :             :         {
    1882                 :             : 
    1883         [ #  # ]:           0 :                 CHECK_FOR_INTERRUPTS();
    1884                 :             : 
    1885                 :             :                 /*
    1886                 :             :                  * If the buffer can accept the new GIN tuple, just store it there and
    1887                 :             :                  * we're done. If it's a different key (or maybe too much data) flush
    1888                 :             :                  * the current contents into the index first.
    1889                 :             :                  */
    1890         [ #  # ]:           0 :                 if (!GinBufferCanAddKey(buffer, tup))
    1891                 :             :                 {
    1892                 :           0 :                         GinTuple   *ntup;
    1893                 :           0 :                         Size            ntuplen;
    1894                 :             : 
    1895                 :             :                         /*
    1896                 :             :                          * Buffer is not empty and it's storing a different key - flush
    1897                 :             :                          * the data into the insert, and start a new entry for current
    1898                 :             :                          * GinTuple.
    1899                 :             :                          */
    1900                 :           0 :                         AssertCheckItemPointers(buffer);
    1901                 :             : 
    1902                 :           0 :                         ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1903                 :           0 :                                                                         buffer->key, buffer->typlen, buffer->typbyval,
    1904                 :           0 :                                                                         buffer->items, buffer->nitems, &ntuplen);
    1905                 :             : 
    1906                 :           0 :                         tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1907                 :           0 :                         state->bs_numtuples++;
    1908                 :             : 
    1909                 :           0 :                         pfree(ntup);
    1910                 :             : 
    1911                 :             :                         /* discard the existing data */
    1912                 :           0 :                         GinBufferReset(buffer);
    1913                 :           0 :                 }
    1914                 :             : 
    1915                 :             :                 /*
    1916                 :             :                  * We're about to add a GIN tuple to the buffer - check the memory
    1917                 :             :                  * limit first, and maybe write out some of the data into the index
    1918                 :             :                  * first, if needed (and possible). We only flush the part of the TID
    1919                 :             :                  * list that we know won't change, and only if there's enough data for
    1920                 :             :                  * compression to work well.
    1921                 :             :                  */
    1922         [ #  # ]:           0 :                 if (GinBufferShouldTrim(buffer, tup))
    1923                 :             :                 {
    1924                 :           0 :                         GinTuple   *ntup;
    1925                 :           0 :                         Size            ntuplen;
    1926                 :             : 
    1927         [ #  # ]:           0 :                         Assert(buffer->nfrozen > 0);
    1928                 :             : 
    1929                 :             :                         /*
    1930                 :             :                          * Buffer is not empty and it's storing a different key - flush
    1931                 :             :                          * the data into the insert, and start a new entry for current
    1932                 :             :                          * GinTuple.
    1933                 :             :                          */
    1934                 :           0 :                         AssertCheckItemPointers(buffer);
    1935                 :             : 
    1936                 :           0 :                         ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1937                 :           0 :                                                                         buffer->key, buffer->typlen, buffer->typbyval,
    1938                 :           0 :                                                                         buffer->items, buffer->nfrozen, &ntuplen);
    1939                 :             : 
    1940                 :           0 :                         tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1941                 :             : 
    1942                 :           0 :                         pfree(ntup);
    1943                 :             : 
    1944                 :             :                         /* truncate the data we've just discarded */
    1945                 :           0 :                         GinBufferTrim(buffer);
    1946                 :           0 :                 }
    1947                 :             : 
    1948                 :             :                 /*
    1949                 :             :                  * Remember data for the current tuple (either remember the new key,
    1950                 :             :                  * or append if to the existing data).
    1951                 :             :                  */
    1952                 :           0 :                 GinBufferStoreTuple(buffer, tup);
    1953                 :             :         }
    1954                 :             : 
    1955                 :             :         /* flush data remaining in the buffer (for the last key) */
    1956         [ #  # ]:           0 :         if (!GinBufferIsEmpty(buffer))
    1957                 :             :         {
    1958                 :           0 :                 GinTuple   *ntup;
    1959                 :           0 :                 Size            ntuplen;
    1960                 :             : 
    1961                 :           0 :                 AssertCheckItemPointers(buffer);
    1962                 :             : 
    1963                 :           0 :                 ntup = _gin_build_tuple(buffer->attnum, buffer->category,
    1964                 :           0 :                                                                 buffer->key, buffer->typlen, buffer->typbyval,
    1965                 :           0 :                                                                 buffer->items, buffer->nitems, &ntuplen);
    1966                 :             : 
    1967                 :           0 :                 tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
    1968                 :           0 :                 state->bs_numtuples++;
    1969                 :             : 
    1970                 :           0 :                 pfree(ntup);
    1971                 :             : 
    1972                 :             :                 /* discard the existing data */
    1973                 :           0 :                 GinBufferReset(buffer);
    1974                 :           0 :         }
    1975                 :             : 
    1976                 :             :         /* release all the memory */
    1977                 :           0 :         GinBufferFree(buffer);
    1978                 :             : 
    1979                 :           0 :         tuplesort_end(worker_sort);
    1980                 :           0 : }
    1981                 :             : 
    1982                 :             : /*
    1983                 :             :  * Perform a worker's portion of a parallel GIN index build sort.
    1984                 :             :  *
    1985                 :             :  * This generates a tuplesort for the worker portion of the table.
    1986                 :             :  *
    1987                 :             :  * sortmem is the amount of working memory to use within each worker,
    1988                 :             :  * expressed in KBs.
    1989                 :             :  *
    1990                 :             :  * When this returns, workers are done, and need only release resources.
    1991                 :             :  *
    1992                 :             :  * Before feeding data into a shared tuplesort (for the leader process),
    1993                 :             :  * the workers process data in two phases.
    1994                 :             :  *
    1995                 :             :  * 1) A worker reads a portion of rows from the table, accumulates entries
    1996                 :             :  * in memory, and flushes them into a private tuplesort (e.g. because of
    1997                 :             :  * using too much memory).
    1998                 :             :  *
    1999                 :             :  * 2) The private tuplesort gets sorted (by key and TID), the worker reads
    2000                 :             :  * the data again, and combines the entries as much as possible. This has
    2001                 :             :  * to happen eventually, and this way it's done in workers in parallel.
    2002                 :             :  *
    2003                 :             :  * Finally, the combined entries are written into the shared tuplesort, so
    2004                 :             :  * that the leader can process them.
    2005                 :             :  *
    2006                 :             :  * How well this works (compared to just writing entries into the shared
    2007                 :             :  * tuplesort) depends on the data set. For large tables with many distinct
    2008                 :             :  * keys this helps a lot. With many distinct keys it's likely the buffers has
    2009                 :             :  * to be flushed often, generating many entries with the same key and short
    2010                 :             :  * TID lists. These entries need to be sorted and merged at some point,
    2011                 :             :  * before writing them to the index. The merging is quite expensive, it can
    2012                 :             :  * easily be ~50% of a serial build, and doing as much of it in the workers
    2013                 :             :  * means it's parallelized. The leader still has to merge results from the
    2014                 :             :  * workers, but it's much more efficient to merge few large entries than
    2015                 :             :  * many tiny ones.
    2016                 :             :  *
    2017                 :             :  * This also reduces the amount of data the workers pass to the leader through
    2018                 :             :  * the shared tuplesort. OTOH the workers need more space for the private sort,
    2019                 :             :  * possibly up to 2x of the data, if no entries be merged in a worker. But this
    2020                 :             :  * is very unlikely, and the only consequence is inefficiency, so we ignore it.
    2021                 :             :  */
    2022                 :             : static void
    2023                 :           0 : _gin_parallel_scan_and_build(GinBuildState *state,
    2024                 :             :                                                          GinBuildShared *ginshared, Sharedsort *sharedsort,
    2025                 :             :                                                          Relation heap, Relation index,
    2026                 :             :                                                          int sortmem, bool progress)
    2027                 :             : {
    2028                 :           0 :         SortCoordinate coordinate;
    2029                 :           0 :         TableScanDesc scan;
    2030                 :           0 :         double          reltuples;
    2031                 :           0 :         IndexInfo  *indexInfo;
    2032                 :             : 
    2033                 :             :         /* Initialize local tuplesort coordination state */
    2034                 :           0 :         coordinate = palloc0_object(SortCoordinateData);
    2035                 :           0 :         coordinate->isWorker = true;
    2036                 :           0 :         coordinate->nParticipants = -1;
    2037                 :           0 :         coordinate->sharedsort = sharedsort;
    2038                 :             : 
    2039                 :             :         /* remember how much space is allowed for the accumulated entries */
    2040                 :           0 :         state->work_mem = (sortmem / 2);
    2041                 :             : 
    2042                 :             :         /* remember how many workers participate in the build */
    2043                 :           0 :         state->bs_num_workers = ginshared->scantuplesortstates;
    2044                 :             : 
    2045                 :             :         /* Begin "partial" tuplesort */
    2046                 :           0 :         state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
    2047                 :           0 :                                                                                                         state->work_mem,
    2048                 :           0 :                                                                                                         coordinate,
    2049                 :             :                                                                                                         TUPLESORT_NONE);
    2050                 :             : 
    2051                 :             :         /* Local per-worker sort of raw-data */
    2052                 :           0 :         state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
    2053                 :           0 :                                                                                                           state->work_mem,
    2054                 :             :                                                                                                           NULL,
    2055                 :             :                                                                                                           TUPLESORT_NONE);
    2056                 :             : 
    2057                 :             :         /* Join parallel scan */
    2058                 :           0 :         indexInfo = BuildIndexInfo(index);
    2059                 :           0 :         indexInfo->ii_Concurrent = ginshared->isconcurrent;
    2060                 :             : 
    2061                 :           0 :         scan = table_beginscan_parallel(heap,
    2062                 :           0 :                                                                         ParallelTableScanFromGinBuildShared(ginshared));
    2063                 :             : 
    2064                 :           0 :         reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
    2065                 :           0 :                                                                            ginBuildCallbackParallel, state, scan);
    2066                 :             : 
    2067                 :             :         /* write remaining accumulated entries */
    2068                 :           0 :         ginFlushBuildState(state, index);
    2069                 :             : 
    2070                 :             :         /*
    2071                 :             :          * Do the first phase of in-worker processing - sort the data produced by
    2072                 :             :          * the callback, and combine them into much larger chunks and place that
    2073                 :             :          * into the shared tuplestore for leader to process.
    2074                 :             :          */
    2075                 :           0 :         _gin_process_worker_data(state, state->bs_worker_sort, progress);
    2076                 :             : 
    2077                 :             :         /* sort the GIN tuples built by this worker */
    2078                 :           0 :         tuplesort_performsort(state->bs_sortstate);
    2079                 :             : 
    2080                 :           0 :         state->bs_reltuples += reltuples;
    2081                 :             : 
    2082                 :             :         /*
    2083                 :             :          * Done.  Record ambuild statistics.
    2084                 :             :          */
    2085         [ #  # ]:           0 :         SpinLockAcquire(&ginshared->mutex);
    2086                 :           0 :         ginshared->nparticipantsdone++;
    2087                 :           0 :         ginshared->reltuples += state->bs_reltuples;
    2088                 :           0 :         ginshared->indtuples += state->bs_numtuples;
    2089                 :           0 :         SpinLockRelease(&ginshared->mutex);
    2090                 :             : 
    2091                 :             :         /* Notify leader */
    2092                 :           0 :         ConditionVariableSignal(&ginshared->workersdonecv);
    2093                 :             : 
    2094                 :           0 :         tuplesort_end(state->bs_sortstate);
    2095                 :           0 : }
    2096                 :             : 
    2097                 :             : /*
    2098                 :             :  * Perform work within a launched parallel process.
    2099                 :             :  */
    2100                 :             : void
    2101                 :           0 : _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
    2102                 :             : {
    2103                 :           0 :         char       *sharedquery;
    2104                 :           0 :         GinBuildShared *ginshared;
    2105                 :           0 :         Sharedsort *sharedsort;
    2106                 :           0 :         GinBuildState buildstate;
    2107                 :           0 :         Relation        heapRel;
    2108                 :           0 :         Relation        indexRel;
    2109                 :           0 :         LOCKMODE        heapLockmode;
    2110                 :           0 :         LOCKMODE        indexLockmode;
    2111                 :           0 :         WalUsage   *walusage;
    2112                 :           0 :         BufferUsage *bufferusage;
    2113                 :           0 :         int                     sortmem;
    2114                 :             : 
    2115                 :             :         /*
    2116                 :             :          * The only possible status flag that can be set to the parallel worker is
    2117                 :             :          * PROC_IN_SAFE_IC.
    2118                 :             :          */
    2119   [ #  #  #  # ]:           0 :         Assert((MyProc->statusFlags == 0) ||
    2120                 :             :                    (MyProc->statusFlags == PROC_IN_SAFE_IC));
    2121                 :             : 
    2122                 :             :         /* Set debug_query_string for individual workers first */
    2123                 :           0 :         sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
    2124                 :           0 :         debug_query_string = sharedquery;
    2125                 :             : 
    2126                 :             :         /* Report the query string from leader */
    2127                 :           0 :         pgstat_report_activity(STATE_RUNNING, debug_query_string);
    2128                 :             : 
    2129                 :             :         /* Look up gin shared state */
    2130                 :           0 :         ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
    2131                 :             : 
    2132                 :             :         /* Open relations using lock modes known to be obtained by index.c */
    2133         [ #  # ]:           0 :         if (!ginshared->isconcurrent)
    2134                 :             :         {
    2135                 :           0 :                 heapLockmode = ShareLock;
    2136                 :           0 :                 indexLockmode = AccessExclusiveLock;
    2137                 :           0 :         }
    2138                 :             :         else
    2139                 :             :         {
    2140                 :           0 :                 heapLockmode = ShareUpdateExclusiveLock;
    2141                 :           0 :                 indexLockmode = RowExclusiveLock;
    2142                 :             :         }
    2143                 :             : 
    2144                 :             :         /* Open relations within worker */
    2145                 :           0 :         heapRel = table_open(ginshared->heaprelid, heapLockmode);
    2146                 :           0 :         indexRel = index_open(ginshared->indexrelid, indexLockmode);
    2147                 :             : 
    2148                 :             :         /* initialize the GIN build state */
    2149                 :           0 :         initGinState(&buildstate.ginstate, indexRel);
    2150                 :           0 :         buildstate.indtuples = 0;
    2151                 :           0 :         memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
    2152                 :           0 :         memset(&buildstate.tid, 0, sizeof(ItemPointerData));
    2153                 :             : 
    2154                 :             :         /*
    2155                 :             :          * create a temporary memory context that is used to hold data not yet
    2156                 :             :          * dumped out to the index
    2157                 :             :          */
    2158                 :           0 :         buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
    2159                 :             :                                                                                           "Gin build temporary context",
    2160                 :             :                                                                                           ALLOCSET_DEFAULT_SIZES);
    2161                 :             : 
    2162                 :             :         /*
    2163                 :             :          * create a temporary memory context that is used for calling
    2164                 :             :          * ginExtractEntries(), and can be reset after each tuple
    2165                 :             :          */
    2166                 :           0 :         buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
    2167                 :             :                                                                                            "Gin build temporary context for user-defined function",
    2168                 :             :                                                                                            ALLOCSET_DEFAULT_SIZES);
    2169                 :             : 
    2170                 :           0 :         buildstate.accum.ginstate = &buildstate.ginstate;
    2171                 :           0 :         ginInitBA(&buildstate.accum);
    2172                 :             : 
    2173                 :             : 
    2174                 :             :         /* Look up shared state private to tuplesort.c */
    2175                 :           0 :         sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
    2176                 :           0 :         tuplesort_attach_shared(sharedsort, seg);
    2177                 :             : 
    2178                 :             :         /* Prepare to track buffer usage during parallel execution */
    2179                 :           0 :         InstrStartParallelQuery();
    2180                 :             : 
    2181                 :             :         /*
    2182                 :             :          * Might as well use reliable figure when doling out maintenance_work_mem
    2183                 :             :          * (when requested number of workers were not launched, this will be
    2184                 :             :          * somewhat higher than it is for other workers).
    2185                 :             :          */
    2186                 :           0 :         sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
    2187                 :             : 
    2188                 :           0 :         _gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
    2189                 :           0 :                                                                  heapRel, indexRel, sortmem, false);
    2190                 :             : 
    2191                 :             :         /* Report WAL/buffer usage during parallel execution */
    2192                 :           0 :         bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
    2193                 :           0 :         walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
    2194                 :           0 :         InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
    2195                 :           0 :                                                   &walusage[ParallelWorkerNumber]);
    2196                 :             : 
    2197                 :           0 :         index_close(indexRel, indexLockmode);
    2198                 :           0 :         table_close(heapRel, heapLockmode);
    2199                 :           0 : }
    2200                 :             : 
    2201                 :             : /*
    2202                 :             :  * Used to keep track of compressed TID lists when building a GIN tuple.
    2203                 :             :  */
    2204                 :             : typedef struct
    2205                 :             : {
    2206                 :             :         dlist_node      node;                   /* linked list pointers */
    2207                 :             :         GinPostingList *seg;
    2208                 :             : } GinSegmentInfo;
    2209                 :             : 
    2210                 :             : /*
    2211                 :             :  * _gin_build_tuple
    2212                 :             :  *              Serialize the state for an index key into a tuple for tuplesort.
    2213                 :             :  *
    2214                 :             :  * The tuple has a number of scalar fields (mostly matching the build state),
    2215                 :             :  * and then a data array that stores the key first, and then the TID list.
    2216                 :             :  *
    2217                 :             :  * For by-reference data types, we store the actual data. For by-val types
    2218                 :             :  * we simply copy the whole Datum, so that we don't have to care about stuff
    2219                 :             :  * like endianness etc. We could make it a little bit smaller, but it's not
    2220                 :             :  * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
    2221                 :             :  * start of the TID list anyway. So we wouldn't save anything. (This would
    2222                 :             :  * not be a good idea for the permanent in-index data, since we'd prefer
    2223                 :             :  * that that not depend on sizeof(Datum). But this is just a transient
    2224                 :             :  * representation to use while sorting the data.)
    2225                 :             :  *
    2226                 :             :  * The TID list is serialized as compressed - it's highly compressible, and
    2227                 :             :  * we already have ginCompressPostingList for this purpose. The list may be
    2228                 :             :  * pretty long, so we compress it into multiple segments and then copy all
    2229                 :             :  * of that into the GIN tuple.
    2230                 :             :  */
    2231                 :             : static GinTuple *
    2232                 :           0 : _gin_build_tuple(OffsetNumber attrnum, unsigned char category,
    2233                 :             :                                  Datum key, int16 typlen, bool typbyval,
    2234                 :             :                                  ItemPointerData *items, uint32 nitems,
    2235                 :             :                                  Size *len)
    2236                 :             : {
    2237                 :           0 :         GinTuple   *tuple;
    2238                 :           0 :         char       *ptr;
    2239                 :             : 
    2240                 :           0 :         Size            tuplen;
    2241                 :           0 :         int                     keylen;
    2242                 :             : 
    2243                 :           0 :         dlist_mutable_iter iter;
    2244                 :           0 :         dlist_head      segments;
    2245                 :           0 :         int                     ncompressed;
    2246                 :           0 :         Size            compresslen;
    2247                 :             : 
    2248                 :             :         /*
    2249                 :             :          * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
    2250                 :             :          * have actual non-empty key. We include varlena headers and \0 bytes for
    2251                 :             :          * strings, to make it easier to access the data in-line.
    2252                 :             :          *
    2253                 :             :          * For byval types we simply copy the whole Datum. We could store just the
    2254                 :             :          * necessary bytes, but this is simpler to work with and not worth the
    2255                 :             :          * extra complexity. Moreover we still need to do the MAXALIGN to allow
    2256                 :             :          * direct access to items pointers.
    2257                 :             :          *
    2258                 :             :          * XXX Note that for byval types we store the whole datum, no matter what
    2259                 :             :          * the typlen value is.
    2260                 :             :          */
    2261         [ #  # ]:           0 :         if (category != GIN_CAT_NORM_KEY)
    2262                 :           0 :                 keylen = 0;
    2263         [ #  # ]:           0 :         else if (typbyval)
    2264                 :           0 :                 keylen = sizeof(Datum);
    2265         [ #  # ]:           0 :         else if (typlen > 0)
    2266                 :           0 :                 keylen = typlen;
    2267         [ #  # ]:           0 :         else if (typlen == -1)
    2268                 :           0 :                 keylen = VARSIZE_ANY(DatumGetPointer(key));
    2269         [ #  # ]:           0 :         else if (typlen == -2)
    2270                 :           0 :                 keylen = strlen(DatumGetPointer(key)) + 1;
    2271                 :             :         else
    2272   [ #  #  #  # ]:           0 :                 elog(ERROR, "unexpected typlen value (%d)", typlen);
    2273                 :             : 
    2274                 :             :         /* compress the item pointers */
    2275                 :           0 :         ncompressed = 0;
    2276                 :           0 :         compresslen = 0;
    2277                 :           0 :         dlist_init(&segments);
    2278                 :             : 
    2279                 :             :         /* generate compressed segments of TID list chunks */
    2280         [ #  # ]:           0 :         while (ncompressed < nitems)
    2281                 :             :         {
    2282                 :           0 :                 int                     cnt;
    2283                 :           0 :                 GinSegmentInfo *seginfo = palloc_object(GinSegmentInfo);
    2284                 :             : 
    2285                 :           0 :                 seginfo->seg = ginCompressPostingList(&items[ncompressed],
    2286                 :           0 :                                                                                           (nitems - ncompressed),
    2287                 :             :                                                                                           UINT16_MAX,
    2288                 :             :                                                                                           &cnt);
    2289                 :             : 
    2290                 :           0 :                 ncompressed += cnt;
    2291                 :           0 :                 compresslen += SizeOfGinPostingList(seginfo->seg);
    2292                 :             : 
    2293                 :           0 :                 dlist_push_tail(&segments, &seginfo->node);
    2294                 :           0 :         }
    2295                 :             : 
    2296                 :             :         /*
    2297                 :             :          * Determine GIN tuple length with all the data included. Be careful about
    2298                 :             :          * alignment, to allow direct access to compressed segments (those require
    2299                 :             :          * only SHORTALIGN).
    2300                 :             :          */
    2301                 :           0 :         tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) + compresslen;
    2302                 :             : 
    2303                 :           0 :         *len = tuplen;
    2304                 :             : 
    2305                 :             :         /*
    2306                 :             :          * Allocate space for the whole GIN tuple.
    2307                 :             :          *
    2308                 :             :          * The palloc0 is needed - writetup_index_gin will write the whole tuple
    2309                 :             :          * to disk, so we need to make sure the padding bytes are defined
    2310                 :             :          * (otherwise valgrind would report this).
    2311                 :             :          */
    2312                 :           0 :         tuple = palloc0(tuplen);
    2313                 :             : 
    2314                 :           0 :         tuple->tuplen = tuplen;
    2315                 :           0 :         tuple->attrnum = attrnum;
    2316                 :           0 :         tuple->category = category;
    2317                 :           0 :         tuple->keylen = keylen;
    2318                 :           0 :         tuple->nitems = nitems;
    2319                 :             : 
    2320                 :             :         /* key type info */
    2321                 :           0 :         tuple->typlen = typlen;
    2322                 :           0 :         tuple->typbyval = typbyval;
    2323                 :             : 
    2324                 :             :         /*
    2325                 :             :          * Copy the key and items into the tuple. First the key value, which we
    2326                 :             :          * can simply copy right at the beginning of the data array.
    2327                 :             :          */
    2328         [ #  # ]:           0 :         if (category == GIN_CAT_NORM_KEY)
    2329                 :             :         {
    2330         [ #  # ]:           0 :                 if (typbyval)
    2331                 :             :                 {
    2332                 :           0 :                         memcpy(tuple->data, &key, sizeof(Datum));
    2333                 :           0 :                 }
    2334         [ #  # ]:           0 :                 else if (typlen > 0) /* byref, fixed length */
    2335                 :             :                 {
    2336                 :           0 :                         memcpy(tuple->data, DatumGetPointer(key), typlen);
    2337                 :           0 :                 }
    2338         [ #  # ]:           0 :                 else if (typlen == -1)
    2339                 :             :                 {
    2340                 :           0 :                         memcpy(tuple->data, DatumGetPointer(key), keylen);
    2341                 :           0 :                 }
    2342         [ #  # ]:           0 :                 else if (typlen == -2)
    2343                 :             :                 {
    2344                 :           0 :                         memcpy(tuple->data, DatumGetPointer(key), keylen);
    2345                 :           0 :                 }
    2346                 :           0 :         }
    2347                 :             : 
    2348                 :             :         /* finally, copy the TIDs into the array */
    2349                 :           0 :         ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
    2350                 :             : 
    2351                 :             :         /* copy in the compressed data, and free the segments */
    2352   [ #  #  #  # ]:           0 :         dlist_foreach_modify(iter, &segments)
    2353                 :             :         {
    2354                 :           0 :                 GinSegmentInfo *seginfo = dlist_container(GinSegmentInfo, node, iter.cur);
    2355                 :             : 
    2356                 :           0 :                 memcpy(ptr, seginfo->seg, SizeOfGinPostingList(seginfo->seg));
    2357                 :             : 
    2358                 :           0 :                 ptr += SizeOfGinPostingList(seginfo->seg);
    2359                 :             : 
    2360                 :           0 :                 dlist_delete(&seginfo->node);
    2361                 :             : 
    2362                 :           0 :                 pfree(seginfo->seg);
    2363                 :           0 :                 pfree(seginfo);
    2364                 :           0 :         }
    2365                 :             : 
    2366                 :           0 :         return tuple;
    2367                 :           0 : }
    2368                 :             : 
    2369                 :             : /*
    2370                 :             :  * _gin_parse_tuple_key
    2371                 :             :  *              Return a Datum representing the key stored in the tuple.
    2372                 :             :  *
    2373                 :             :  * Most of the tuple fields are directly accessible, the only thing that
    2374                 :             :  * needs more care is the key and the TID list.
    2375                 :             :  *
    2376                 :             :  * For the key, this returns a regular Datum representing it. It's either the
    2377                 :             :  * actual key value, or a pointer to the beginning of the data array (which is
    2378                 :             :  * where the data was copied by _gin_build_tuple).
    2379                 :             :  */
    2380                 :             : static Datum
    2381                 :           0 : _gin_parse_tuple_key(GinTuple *a)
    2382                 :             : {
    2383                 :           0 :         Datum           key;
    2384                 :             : 
    2385         [ #  # ]:           0 :         if (a->category != GIN_CAT_NORM_KEY)
    2386                 :           0 :                 return (Datum) 0;
    2387                 :             : 
    2388         [ #  # ]:           0 :         if (a->typbyval)
    2389                 :             :         {
    2390                 :           0 :                 memcpy(&key, a->data, a->keylen);
    2391                 :           0 :                 return key;
    2392                 :             :         }
    2393                 :             : 
    2394                 :           0 :         return PointerGetDatum(a->data);
    2395                 :           0 : }
    2396                 :             : 
    2397                 :             : /*
    2398                 :             : * _gin_parse_tuple_items
    2399                 :             :  *              Return a pointer to a palloc'd array of decompressed TID array.
    2400                 :             :  */
    2401                 :             : static ItemPointer
    2402                 :           0 : _gin_parse_tuple_items(GinTuple *a)
    2403                 :             : {
    2404                 :           0 :         int                     len;
    2405                 :           0 :         char       *ptr;
    2406                 :           0 :         int                     ndecoded;
    2407                 :           0 :         ItemPointer items;
    2408                 :             : 
    2409                 :           0 :         len = a->tuplen - SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2410                 :           0 :         ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
    2411                 :             : 
    2412                 :           0 :         items = ginPostingListDecodeAllSegments((GinPostingList *) ptr, len, &ndecoded);
    2413                 :             : 
    2414         [ #  # ]:           0 :         Assert(ndecoded == a->nitems);
    2415                 :             : 
    2416                 :           0 :         return items;
    2417                 :           0 : }
    2418                 :             : 
    2419                 :             : /*
    2420                 :             :  * _gin_compare_tuples
    2421                 :             :  *              Compare GIN tuples, used by tuplesort during parallel index build.
    2422                 :             :  *
    2423                 :             :  * The scalar fields (attrnum, category) are compared first, the key value is
    2424                 :             :  * compared last. The comparisons are done using type-specific sort support
    2425                 :             :  * functions.
    2426                 :             :  *
    2427                 :             :  * If the key value matches, we compare the first TID value in the TID list,
    2428                 :             :  * which means the tuples are merged in an order in which they are most
    2429                 :             :  * likely to be simply concatenated. (This "first" TID will also allow us
    2430                 :             :  * to determine a point up to which the list is fully determined and can be
    2431                 :             :  * written into the index to enforce a memory limit etc.)
    2432                 :             :  */
    2433                 :             : int
    2434                 :           0 : _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
    2435                 :             : {
    2436                 :           0 :         int                     r;
    2437                 :           0 :         Datum           keya,
    2438                 :             :                                 keyb;
    2439                 :             : 
    2440         [ #  # ]:           0 :         if (a->attrnum < b->attrnum)
    2441                 :           0 :                 return -1;
    2442                 :             : 
    2443         [ #  # ]:           0 :         if (a->attrnum > b->attrnum)
    2444                 :           0 :                 return 1;
    2445                 :             : 
    2446         [ #  # ]:           0 :         if (a->category < b->category)
    2447                 :           0 :                 return -1;
    2448                 :             : 
    2449         [ #  # ]:           0 :         if (a->category > b->category)
    2450                 :           0 :                 return 1;
    2451                 :             : 
    2452         [ #  # ]:           0 :         if (a->category == GIN_CAT_NORM_KEY)
    2453                 :             :         {
    2454                 :           0 :                 keya = _gin_parse_tuple_key(a);
    2455                 :           0 :                 keyb = _gin_parse_tuple_key(b);
    2456                 :             : 
    2457                 :           0 :                 r = ApplySortComparator(keya, false,
    2458                 :           0 :                                                                 keyb, false,
    2459                 :           0 :                                                                 &ssup[a->attrnum - 1]);
    2460                 :             : 
    2461                 :             :                 /* if the key is the same, consider the first TID in the array */
    2462         [ #  # ]:           0 :                 return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
    2463                 :           0 :                                                                                                  GinTupleGetFirst(b));
    2464                 :             :         }
    2465                 :             : 
    2466                 :           0 :         return ItemPointerCompare(GinTupleGetFirst(a),
    2467                 :           0 :                                                           GinTupleGetFirst(b));
    2468                 :           0 : }
        

Generated by: LCOV version 2.3.2-1