LCOV - code coverage report
Current view: top level - src/backend/access/tablesample - system.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 100.0 % 75 75
Test Date: 2026-01-26 10:56:24 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 75.7 % 37 28

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * system.c
       4                 :             :  *        support routines for SYSTEM tablesample method
       5                 :             :  *
       6                 :             :  * To ensure repeatability of samples, it is necessary that selection of a
       7                 :             :  * given tuple be history-independent; otherwise syncscanning would break
       8                 :             :  * repeatability, to say nothing of logically-irrelevant maintenance such
       9                 :             :  * as physical extension or shortening of the relation.
      10                 :             :  *
      11                 :             :  * To achieve that, we proceed by hashing each candidate block number together
      12                 :             :  * with the active seed, and then selecting it if the hash is less than the
      13                 :             :  * cutoff value computed from the selection probability by BeginSampleScan.
      14                 :             :  *
      15                 :             :  *
      16                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      17                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
      18                 :             :  *
      19                 :             :  * IDENTIFICATION
      20                 :             :  *        src/backend/access/tablesample/system.c
      21                 :             :  *
      22                 :             :  *-------------------------------------------------------------------------
      23                 :             :  */
      24                 :             : 
      25                 :             : #include "postgres.h"
      26                 :             : 
      27                 :             : #include <math.h>
      28                 :             : 
      29                 :             : #include "access/tsmapi.h"
      30                 :             : #include "catalog/pg_type.h"
      31                 :             : #include "common/hashfn.h"
      32                 :             : #include "optimizer/optimizer.h"
      33                 :             : #include "utils/fmgrprotos.h"
      34                 :             : 
      35                 :             : 
      36                 :             : /* Private state */
      37                 :             : typedef struct
      38                 :             : {
      39                 :             :         uint64          cutoff;                 /* select blocks with hash less than this */
      40                 :             :         uint32          seed;                   /* random seed */
      41                 :             :         BlockNumber nextblock;          /* next block to consider sampling */
      42                 :             :         OffsetNumber lt;                        /* last tuple returned from current block */
      43                 :             : } SystemSamplerData;
      44                 :             : 
      45                 :             : 
      46                 :             : static void system_samplescangetsamplesize(PlannerInfo *root,
      47                 :             :                                                                                    RelOptInfo *baserel,
      48                 :             :                                                                                    List *paramexprs,
      49                 :             :                                                                                    BlockNumber *pages,
      50                 :             :                                                                                    double *tuples);
      51                 :             : static void system_initsamplescan(SampleScanState *node,
      52                 :             :                                                                   int eflags);
      53                 :             : static void system_beginsamplescan(SampleScanState *node,
      54                 :             :                                                                    Datum *params,
      55                 :             :                                                                    int nparams,
      56                 :             :                                                                    uint32 seed);
      57                 :             : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
      58                 :             : static OffsetNumber system_nextsampletuple(SampleScanState *node,
      59                 :             :                                                                                    BlockNumber blockno,
      60                 :             :                                                                                    OffsetNumber maxoffset);
      61                 :             : 
      62                 :             : 
      63                 :             : /*
      64                 :             :  * Create a TsmRoutine descriptor for the SYSTEM method.
      65                 :             :  */
      66                 :             : Datum
      67                 :         106 : tsm_system_handler(PG_FUNCTION_ARGS)
      68                 :             : {
      69                 :         106 :         TsmRoutine *tsm = makeNode(TsmRoutine);
      70                 :             : 
      71                 :         106 :         tsm->parameterTypes = list_make1_oid(FLOAT4OID);
      72                 :         106 :         tsm->repeatable_across_queries = true;
      73                 :         106 :         tsm->repeatable_across_scans = true;
      74                 :         106 :         tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
      75                 :         106 :         tsm->InitSampleScan = system_initsamplescan;
      76                 :         106 :         tsm->BeginSampleScan = system_beginsamplescan;
      77                 :         106 :         tsm->NextSampleBlock = system_nextsampleblock;
      78                 :         106 :         tsm->NextSampleTuple = system_nextsampletuple;
      79                 :         106 :         tsm->EndSampleScan = NULL;
      80                 :             : 
      81                 :         212 :         PG_RETURN_POINTER(tsm);
      82                 :         106 : }
      83                 :             : 
      84                 :             : /*
      85                 :             :  * Sample size estimation.
      86                 :             :  */
      87                 :             : static void
      88                 :          25 : system_samplescangetsamplesize(PlannerInfo *root,
      89                 :             :                                                            RelOptInfo *baserel,
      90                 :             :                                                            List *paramexprs,
      91                 :             :                                                            BlockNumber *pages,
      92                 :             :                                                            double *tuples)
      93                 :             : {
      94                 :          25 :         Node       *pctnode;
      95                 :          25 :         float4          samplefract;
      96                 :             : 
      97                 :             :         /* Try to extract an estimate for the sample percentage */
      98                 :          25 :         pctnode = (Node *) linitial(paramexprs);
      99                 :          25 :         pctnode = estimate_expression_value(root, pctnode);
     100                 :             : 
     101   [ +  +  +  + ]:          25 :         if (IsA(pctnode, Const) &&
     102                 :          14 :                 !((Const *) pctnode)->constisnull)
     103                 :             :         {
     104                 :          13 :                 samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
     105   [ +  +  +  +  :          13 :                 if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
          +  -  -  +  #  
                      # ]
     106                 :          11 :                         samplefract /= 100.0f;
     107                 :             :                 else
     108                 :             :                 {
     109                 :             :                         /* Default samplefract if the value is bogus */
     110                 :           2 :                         samplefract = 0.1f;
     111                 :             :                 }
     112                 :          13 :         }
     113                 :             :         else
     114                 :             :         {
     115                 :             :                 /* Default samplefract if we didn't obtain a non-null Const */
     116                 :          12 :                 samplefract = 0.1f;
     117                 :             :         }
     118                 :             : 
     119                 :             :         /* We'll visit a sample of the pages ... */
     120                 :          25 :         *pages = clamp_row_est(baserel->pages * samplefract);
     121                 :             : 
     122                 :             :         /* ... and hopefully get a representative number of tuples from them */
     123                 :          25 :         *tuples = clamp_row_est(baserel->tuples * samplefract);
     124                 :          25 : }
     125                 :             : 
     126                 :             : /*
     127                 :             :  * Initialize during executor setup.
     128                 :             :  */
     129                 :             : static void
     130                 :          25 : system_initsamplescan(SampleScanState *node, int eflags)
     131                 :             : {
     132                 :          25 :         node->tsm_state = palloc0_object(SystemSamplerData);
     133                 :          25 : }
     134                 :             : 
     135                 :             : /*
     136                 :             :  * Examine parameters and prepare for a sample scan.
     137                 :             :  */
     138                 :             : static void
     139                 :          14 : system_beginsamplescan(SampleScanState *node,
     140                 :             :                                            Datum *params,
     141                 :             :                                            int nparams,
     142                 :             :                                            uint32 seed)
     143                 :             : {
     144                 :          14 :         SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     145                 :          14 :         double          percent = DatumGetFloat4(params[0]);
     146                 :          14 :         double          dcutoff;
     147                 :             : 
     148   [ +  +  -  +  :          14 :         if (percent < 0 || percent > 100 || isnan(percent))
                   +  - ]
     149   [ +  -  +  - ]:           2 :                 ereport(ERROR,
     150                 :             :                                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
     151                 :             :                                  errmsg("sample percentage must be between 0 and 100")));
     152                 :             : 
     153                 :             :         /*
     154                 :             :          * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
     155                 :             :          * store that as a uint64, of course.  Note that this gives strictly
     156                 :             :          * correct behavior at the limits of zero or one probability.
     157                 :             :          */
     158                 :          12 :         dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
     159                 :          12 :         sampler->cutoff = (uint64) dcutoff;
     160                 :          12 :         sampler->seed = seed;
     161                 :          12 :         sampler->nextblock = 0;
     162                 :          12 :         sampler->lt = InvalidOffsetNumber;
     163                 :             : 
     164                 :             :         /*
     165                 :             :          * Bulkread buffer access strategy probably makes sense unless we're
     166                 :             :          * scanning a very small fraction of the table.  The 1% cutoff here is a
     167                 :             :          * guess.  We should use pagemode visibility checking, since we scan all
     168                 :             :          * tuples on each selected page.
     169                 :             :          */
     170                 :          12 :         node->use_bulkread = (percent >= 1);
     171                 :          12 :         node->use_pagemode = true;
     172                 :          12 : }
     173                 :             : 
     174                 :             : /*
     175                 :             :  * Select next block to sample.
     176                 :             :  */
     177                 :             : static BlockNumber
     178                 :         721 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
     179                 :             : {
     180                 :         721 :         SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     181                 :         721 :         BlockNumber nextblock = sampler->nextblock;
     182                 :         721 :         uint32          hashinput[2];
     183                 :             : 
     184                 :             :         /*
     185                 :             :          * We compute the hash by applying hash_any to an array of 2 uint32's
     186                 :             :          * containing the block number and seed.  This is efficient to set up, and
     187                 :             :          * with the current implementation of hash_any, it gives
     188                 :             :          * machine-independent results, which is a nice property for regression
     189                 :             :          * testing.
     190                 :             :          *
     191                 :             :          * These words in the hash input are the same throughout the block:
     192                 :             :          */
     193                 :         721 :         hashinput[1] = sampler->seed;
     194                 :             : 
     195                 :             :         /*
     196                 :             :          * Loop over block numbers until finding suitable block or reaching end of
     197                 :             :          * relation.
     198                 :             :          */
     199         [ +  + ]:        1422 :         for (; nextblock < nblocks; nextblock++)
     200                 :             :         {
     201                 :        1411 :                 uint32          hash;
     202                 :             : 
     203                 :        1411 :                 hashinput[0] = nextblock;
     204                 :             : 
     205                 :        1411 :                 hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
     206                 :             :                                                                            (int) sizeof(hashinput)));
     207         [ +  + ]:        1411 :                 if (hash < sampler->cutoff)
     208                 :         710 :                         break;
     209      [ -  +  + ]:        1411 :         }
     210                 :             : 
     211         [ +  + ]:         721 :         if (nextblock < nblocks)
     212                 :             :         {
     213                 :             :                 /* Found a suitable block; remember where we should start next time */
     214                 :         710 :                 sampler->nextblock = nextblock + 1;
     215                 :         710 :                 return nextblock;
     216                 :             :         }
     217                 :             : 
     218                 :             :         /* Done, but let's reset nextblock to 0 for safety. */
     219                 :          11 :         sampler->nextblock = 0;
     220                 :          11 :         return InvalidBlockNumber;
     221                 :         721 : }
     222                 :             : 
     223                 :             : /*
     224                 :             :  * Select next sampled tuple in current block.
     225                 :             :  *
     226                 :             :  * In block sampling, we just want to sample all the tuples in each selected
     227                 :             :  * block.
     228                 :             :  *
     229                 :             :  * It is OK here to return an offset without knowing if the tuple is visible
     230                 :             :  * (or even exists); nodeSamplescan.c will deal with that.
     231                 :             :  *
     232                 :             :  * When we reach end of the block, return InvalidOffsetNumber which tells
     233                 :             :  * SampleScan to go to next block.
     234                 :             :  */
     235                 :             : static OffsetNumber
     236                 :       20763 : system_nextsampletuple(SampleScanState *node,
     237                 :             :                                            BlockNumber blockno,
     238                 :             :                                            OffsetNumber maxoffset)
     239                 :             : {
     240                 :       20763 :         SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     241                 :       20763 :         OffsetNumber tupoffset = sampler->lt;
     242                 :             : 
     243                 :             :         /* Advance to next possible offset on page */
     244         [ +  + ]:       20763 :         if (tupoffset == InvalidOffsetNumber)
     245                 :         710 :                 tupoffset = FirstOffsetNumber;
     246                 :             :         else
     247                 :       20053 :                 tupoffset++;
     248                 :             : 
     249                 :             :         /* Done? */
     250         [ +  + ]:       20763 :         if (tupoffset > maxoffset)
     251                 :         709 :                 tupoffset = InvalidOffsetNumber;
     252                 :             : 
     253                 :       20763 :         sampler->lt = tupoffset;
     254                 :             : 
     255                 :       41526 :         return tupoffset;
     256                 :       20763 : }
        

Generated by: LCOV version 2.3.2-1