LCOV - Code coverage - src/backend/commands/analyze.c

LCOV - code coverage report

Current view:	top level - src/backend/commands - analyze.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	87.9 %	1336	1174
Test Date:	2026-01-26 10:56:24	Functions:	100.0 %	18	18
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	68.4 %	662	453

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * analyze.c
       4                 :             :  *        the Postgres statistics generator
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/commands/analyze.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include <math.h>
      18                 :             : 
      19                 :             : #include "access/detoast.h"
      20                 :             : #include "access/genam.h"
      21                 :             : #include "access/multixact.h"
      22                 :             : #include "access/relation.h"
      23                 :             : #include "access/table.h"
      24                 :             : #include "access/tableam.h"
      25                 :             : #include "access/transam.h"
      26                 :             : #include "access/tupconvert.h"
      27                 :             : #include "access/visibilitymap.h"
      28                 :             : #include "access/xact.h"
      29                 :             : #include "catalog/index.h"
      30                 :             : #include "catalog/indexing.h"
      31                 :             : #include "catalog/pg_inherits.h"
      32                 :             : #include "commands/progress.h"
      33                 :             : #include "commands/tablecmds.h"
      34                 :             : #include "commands/vacuum.h"
      35                 :             : #include "common/pg_prng.h"
      36                 :             : #include "executor/executor.h"
      37                 :             : #include "foreign/fdwapi.h"
      38                 :             : #include "miscadmin.h"
      39                 :             : #include "nodes/nodeFuncs.h"
      40                 :             : #include "parser/parse_oper.h"
      41                 :             : #include "parser/parse_relation.h"
      42                 :             : #include "pgstat.h"
      43                 :             : #include "statistics/extended_stats_internal.h"
      44                 :             : #include "statistics/statistics.h"
      45                 :             : #include "storage/bufmgr.h"
      46                 :             : #include "storage/procarray.h"
      47                 :             : #include "utils/attoptcache.h"
      48                 :             : #include "utils/datum.h"
      49                 :             : #include "utils/guc.h"
      50                 :             : #include "utils/lsyscache.h"
      51                 :             : #include "utils/memutils.h"
      52                 :             : #include "utils/pg_rusage.h"
      53                 :             : #include "utils/sampling.h"
      54                 :             : #include "utils/sortsupport.h"
      55                 :             : #include "utils/syscache.h"
      56                 :             : #include "utils/timestamp.h"
      57                 :             : 
      58                 :             : 
      59                 :             : /* Per-index data for ANALYZE */
      60                 :             : typedef struct AnlIndexData
      61                 :             : {
      62                 :             :         IndexInfo  *indexInfo;          /* BuildIndexInfo result */
      63                 :             :         double          tupleFract;             /* fraction of rows for partial index */
      64                 :             :         VacAttrStats **vacattrstats;    /* index attrs to analyze */
      65                 :             :         int                     attr_cnt;
      66                 :             : } AnlIndexData;
      67                 :             : 
      68                 :             : 
      69                 :             : /* Default statistics target (GUC parameter) */
      70                 :             : int                     default_statistics_target = 100;
      71                 :             : 
      72                 :             : /* A few variables that don't seem worth passing around as parameters */
      73                 :             : static MemoryContext anl_context = NULL;
      74                 :             : static BufferAccessStrategy vac_strategy;
      75                 :             : 
      76                 :             : 
      77                 :             : static void do_analyze_rel(Relation onerel,
      78                 :             :                                                    const VacuumParams params, List *va_cols,
      79                 :             :                                                    AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
      80                 :             :                                                    bool inh, bool in_outer_xact, int elevel);
      81                 :             : static void compute_index_stats(Relation onerel, double totalrows,
      82                 :             :                                                                 AnlIndexData *indexdata, int nindexes,
      83                 :             :                                                                 HeapTuple *rows, int numrows,
      84                 :             :                                                                 MemoryContext col_context);
      85                 :             : static VacAttrStats *examine_attribute(Relation onerel, int attnum,
      86                 :             :                                                                            Node *index_expr);
      87                 :             : static int      acquire_sample_rows(Relation onerel, int elevel,
      88                 :             :                                                                 HeapTuple *rows, int targrows,
      89                 :             :                                                                 double *totalrows, double *totaldeadrows);
      90                 :             : static int      compare_rows(const void *a, const void *b, void *arg);
      91                 :             : static int      acquire_inherited_sample_rows(Relation onerel, int elevel,
      92                 :             :                                                                                   HeapTuple *rows, int targrows,
      93                 :             :                                                                                   double *totalrows, double *totaldeadrows);
      94                 :             : static void update_attstats(Oid relid, bool inh,
      95                 :             :                                                         int natts, VacAttrStats **vacattrstats);
      96                 :             : static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
      97                 :             : static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
      98                 :             : 
      99                 :             : 
     100                 :             : /*
     101                 :             :  *      analyze_rel() -- analyze one relation
     102                 :             :  *
     103                 :             :  * relid identifies the relation to analyze.  If relation is supplied, use
     104                 :             :  * the name therein for reporting any failure to open/lock the rel; do not
     105                 :             :  * use it once we've successfully opened the rel, since it might be stale.
     106                 :             :  */
     107                 :             : void
     108                 :         791 : analyze_rel(Oid relid, RangeVar *relation,
     109                 :             :                         const VacuumParams params, List *va_cols, bool in_outer_xact,
     110                 :             :                         BufferAccessStrategy bstrategy)
     111                 :             : {
     112                 :         791 :         Relation        onerel;
     113                 :         791 :         int                     elevel;
     114                 :         791 :         AcquireSampleRowsFunc acquirefunc = NULL;
     115                 :         791 :         BlockNumber relpages = 0;
     116                 :             : 
     117                 :             :         /* Select logging level */
     118         [ -  + ]:         791 :         if (params.options & VACOPT_VERBOSE)
     119                 :           0 :                 elevel = INFO;
     120                 :             :         else
     121                 :         791 :                 elevel = DEBUG2;
     122                 :             : 
     123                 :             :         /* Set up static variables */
     124                 :         791 :         vac_strategy = bstrategy;
     125                 :             : 
     126                 :             :         /*
     127                 :             :          * Check for user-requested abort.
     128                 :             :          */
     129         [ +  - ]:         791 :         CHECK_FOR_INTERRUPTS();
     130                 :             : 
     131                 :             :         /*
     132                 :             :          * Open the relation, getting ShareUpdateExclusiveLock to ensure that two
     133                 :             :          * ANALYZEs don't run on it concurrently.  (This also locks out a
     134                 :             :          * concurrent VACUUM, which doesn't matter much at the moment but might
     135                 :             :          * matter if we ever try to accumulate stats on dead tuples.) If the rel
     136                 :             :          * has been dropped since we last saw it, we don't need to process it.
     137                 :             :          *
     138                 :             :          * Make sure to generate only logs for ANALYZE in this case.
     139                 :             :          */
     140                 :        1582 :         onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM),
     141                 :         791 :                                                                   params.log_analyze_min_duration >= 0,
     142                 :             :                                                                   ShareUpdateExclusiveLock);
     143                 :             : 
     144                 :             :         /* leave if relation could not be opened or locked */
     145         [ +  - ]:         791 :         if (!onerel)
     146                 :           0 :                 return;
     147                 :             : 
     148                 :             :         /*
     149                 :             :          * Check if relation needs to be skipped based on privileges.  This check
     150                 :             :          * happens also when building the relation list to analyze for a manual
     151                 :             :          * operation, and needs to be done additionally here as ANALYZE could
     152                 :             :          * happen across multiple transactions where privileges could have changed
     153                 :             :          * in-between.  Make sure to generate only logs for ANALYZE in this case.
     154                 :             :          */
     155   [ +  +  +  + ]:        1582 :         if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
     156                 :         791 :                                                                                   onerel->rd_rel,
     157                 :         791 :                                                                                   params.options & ~VACOPT_VACUUM))
     158                 :             :         {
     159                 :           6 :                 relation_close(onerel, ShareUpdateExclusiveLock);
     160                 :           6 :                 return;
     161                 :             :         }
     162                 :             : 
     163                 :             :         /*
     164                 :             :          * Silently ignore tables that are temp tables of other backends ---
     165                 :             :          * trying to analyze these is rather pointless, since their contents are
     166                 :             :          * probably not up-to-date on disk.  (We don't throw a warning here; it
     167                 :             :          * would just lead to chatter during a database-wide ANALYZE.)
     168                 :             :          */
     169   [ +  +  +  - ]:         785 :         if (RELATION_IS_OTHER_TEMP(onerel))
     170                 :             :         {
     171                 :           0 :                 relation_close(onerel, ShareUpdateExclusiveLock);
     172                 :           0 :                 return;
     173                 :             :         }
     174                 :             : 
     175                 :             :         /*
     176                 :             :          * We can ANALYZE any table except pg_statistic. See update_attstats
     177                 :             :          */
     178         [ +  + ]:         785 :         if (RelationGetRelid(onerel) == StatisticRelationId)
     179                 :             :         {
     180                 :           1 :                 relation_close(onerel, ShareUpdateExclusiveLock);
     181                 :           1 :                 return;
     182                 :             :         }
     183                 :             : 
     184                 :             :         /*
     185                 :             :          * Check that it's of an analyzable relkind, and set up appropriately.
     186                 :             :          */
     187   [ +  +  -  + ]:         784 :         if (onerel->rd_rel->relkind == RELKIND_RELATION ||
     188                 :         126 :                 onerel->rd_rel->relkind == RELKIND_MATVIEW)
     189                 :             :         {
     190                 :             :                 /* Regular table, so we'll use the regular row acquisition function */
     191                 :         658 :                 acquirefunc = acquire_sample_rows;
     192                 :             :                 /* Also get regular table's size */
     193                 :         658 :                 relpages = RelationGetNumberOfBlocks(onerel);
     194                 :         658 :         }
     195         [ -  + ]:         126 :         else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
     196                 :             :         {
     197                 :             :                 /*
     198                 :             :                  * For a foreign table, call the FDW's hook function to see whether it
     199                 :             :                  * supports analysis.
     200                 :             :                  */
     201                 :           0 :                 FdwRoutine *fdwroutine;
     202                 :           0 :                 bool            ok = false;
     203                 :             : 
     204                 :           0 :                 fdwroutine = GetFdwRoutineForRelation(onerel, false);
     205                 :             : 
     206         [ #  # ]:           0 :                 if (fdwroutine->AnalyzeForeignTable != NULL)
     207                 :           0 :                         ok = fdwroutine->AnalyzeForeignTable(onerel,
     208                 :             :                                                                                                  &acquirefunc,
     209                 :             :                                                                                                  &relpages);
     210                 :             : 
     211         [ #  # ]:           0 :                 if (!ok)
     212                 :             :                 {
     213   [ #  #  #  # ]:           0 :                         ereport(WARNING,
     214                 :             :                                         (errmsg("skipping \"%s\" --- cannot analyze this foreign table",
     215                 :             :                                                         RelationGetRelationName(onerel))));
     216                 :           0 :                         relation_close(onerel, ShareUpdateExclusiveLock);
     217                 :           0 :                         return;
     218                 :             :                 }
     219         [ #  # ]:           0 :         }
     220         [ +  - ]:         126 :         else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     221                 :             :         {
     222                 :             :                 /*
     223                 :             :                  * For partitioned tables, we want to do the recursive ANALYZE below.
     224                 :             :                  */
     225                 :         126 :         }
     226                 :             :         else
     227                 :             :         {
     228                 :             :                 /* No need for a WARNING if we already complained during VACUUM */
     229         [ #  # ]:           0 :                 if (!(params.options & VACOPT_VACUUM))
     230   [ #  #  #  # ]:           0 :                         ereport(WARNING,
     231                 :             :                                         (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
     232                 :             :                                                         RelationGetRelationName(onerel))));
     233                 :           0 :                 relation_close(onerel, ShareUpdateExclusiveLock);
     234                 :           0 :                 return;
     235                 :             :         }
     236                 :             : 
     237                 :             :         /*
     238                 :             :          * OK, let's do it.  First, initialize progress reporting.
     239                 :             :          */
     240                 :         784 :         pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
     241                 :         784 :                                                                   RelationGetRelid(onerel));
     242         [ -  + ]:         784 :         if (AmAutoVacuumWorkerProcess())
     243                 :           0 :                 pgstat_progress_update_param(PROGRESS_ANALYZE_STARTED_BY,
     244                 :             :                                                                          PROGRESS_ANALYZE_STARTED_BY_AUTOVACUUM);
     245                 :             :         else
     246                 :         784 :                 pgstat_progress_update_param(PROGRESS_ANALYZE_STARTED_BY,
     247                 :             :                                                                          PROGRESS_ANALYZE_STARTED_BY_MANUAL);
     248                 :             : 
     249                 :             :         /*
     250                 :             :          * Do the normal non-recursive ANALYZE.  We can skip this for partitioned
     251                 :             :          * tables, which don't contain any rows.
     252                 :             :          */
     253         [ +  + ]:         784 :         if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
     254                 :        1316 :                 do_analyze_rel(onerel, params, va_cols, acquirefunc,
     255                 :         658 :                                            relpages, false, in_outer_xact, elevel);
     256                 :             : 
     257                 :             :         /*
     258                 :             :          * If there are child tables, do recursive ANALYZE.
     259                 :             :          */
     260         [ +  + ]:         784 :         if (onerel->rd_rel->relhassubclass)
     261                 :         290 :                 do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages,
     262                 :         145 :                                            true, in_outer_xact, elevel);
     263                 :             : 
     264                 :             :         /*
     265                 :             :          * Close source relation now, but keep lock so that no one deletes it
     266                 :             :          * before we commit.  (If someone did, they'd fail to clean up the entries
     267                 :             :          * we made in pg_statistic.  Also, releasing the lock before commit would
     268                 :             :          * expose us to concurrent-update failures in update_attstats.)
     269                 :             :          */
     270                 :         784 :         relation_close(onerel, NoLock);
     271                 :             : 
     272                 :         784 :         pgstat_progress_end_command();
     273         [ -  + ]:         791 : }
     274                 :             : 
     275                 :             : /*
     276                 :             :  *      do_analyze_rel() -- analyze one relation, recursively or not
     277                 :             :  *
     278                 :             :  * Note that "acquirefunc" is only relevant for the non-inherited case.
     279                 :             :  * For the inherited case, acquire_inherited_sample_rows() determines the
     280                 :             :  * appropriate acquirefunc for each child table.
     281                 :             :  */
     282                 :             : static void
     283                 :         804 : do_analyze_rel(Relation onerel, const VacuumParams params,
     284                 :             :                            List *va_cols, AcquireSampleRowsFunc acquirefunc,
     285                 :             :                            BlockNumber relpages, bool inh, bool in_outer_xact,
     286                 :             :                            int elevel)
     287                 :             : {
     288                 :         804 :         int                     attr_cnt,
     289                 :             :                                 tcnt,
     290                 :             :                                 i,
     291                 :             :                                 ind;
     292                 :         804 :         Relation   *Irel;
     293                 :         804 :         int                     nindexes;
     294                 :         804 :         bool            verbose,
     295                 :             :                                 instrument,
     296                 :             :                                 hasindex;
     297                 :         804 :         VacAttrStats **vacattrstats;
     298                 :         804 :         AnlIndexData *indexdata;
     299                 :         804 :         int                     targrows,
     300                 :             :                                 numrows,
     301                 :             :                                 minrows;
     302                 :         804 :         double          totalrows,
     303                 :             :                                 totaldeadrows;
     304                 :         804 :         HeapTuple  *rows;
     305                 :         804 :         PGRUsage        ru0;
     306                 :         804 :         TimestampTz starttime = 0;
     307                 :         804 :         MemoryContext caller_context;
     308                 :         804 :         Oid                     save_userid;
     309                 :         804 :         int                     save_sec_context;
     310                 :         804 :         int                     save_nestlevel;
     311                 :         804 :         WalUsage        startwalusage = pgWalUsage;
     312                 :         804 :         BufferUsage startbufferusage = pgBufferUsage;
     313                 :         804 :         BufferUsage bufferusage;
     314                 :         804 :         PgStat_Counter startreadtime = 0;
     315                 :         804 :         PgStat_Counter startwritetime = 0;
     316                 :             : 
     317                 :         804 :         verbose = (params.options & VACOPT_VERBOSE) != 0;
     318   [ +  +  +  - ]:         804 :         instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
     319                 :           0 :                                                           params.log_analyze_min_duration >= 0));
     320         [ +  + ]:         804 :         if (inh)
     321   [ -  +  #  #  :         144 :                 ereport(elevel,
          -  +  -  +  #  
                      # ]
     322                 :             :                                 (errmsg("analyzing \"%s.%s\" inheritance tree",
     323                 :             :                                                 get_namespace_name(RelationGetNamespace(onerel)),
     324                 :             :                                                 RelationGetRelationName(onerel))));
     325                 :             :         else
     326   [ -  +  #  #  :         658 :                 ereport(elevel,
          -  +  -  +  #  
                      # ]
     327                 :             :                                 (errmsg("analyzing \"%s.%s\"",
     328                 :             :                                                 get_namespace_name(RelationGetNamespace(onerel)),
     329                 :             :                                                 RelationGetRelationName(onerel))));
     330                 :             : 
     331                 :             :         /*
     332                 :             :          * Set up a working context so that we can easily free whatever junk gets
     333                 :             :          * created.
     334                 :             :          */
     335                 :         802 :         anl_context = AllocSetContextCreate(CurrentMemoryContext,
     336                 :             :                                                                                 "Analyze",
     337                 :             :                                                                                 ALLOCSET_DEFAULT_SIZES);
     338                 :         802 :         caller_context = MemoryContextSwitchTo(anl_context);
     339                 :             : 
     340                 :             :         /*
     341                 :             :          * Switch to the table owner's userid, so that any index functions are run
     342                 :             :          * as that user.  Also lock down security-restricted operations and
     343                 :             :          * arrange to make GUC variable changes local to this command.
     344                 :             :          */
     345                 :         802 :         GetUserIdAndSecContext(&save_userid, &save_sec_context);
     346                 :        1604 :         SetUserIdAndSecContext(onerel->rd_rel->relowner,
     347                 :         802 :                                                    save_sec_context | SECURITY_RESTRICTED_OPERATION);
     348                 :         802 :         save_nestlevel = NewGUCNestLevel();
     349                 :         802 :         RestrictSearchPath();
     350                 :             : 
     351                 :             :         /*
     352                 :             :          * When verbose or autovacuum logging is used, initialize a resource usage
     353                 :             :          * snapshot and optionally track I/O timing.
     354                 :             :          */
     355         [ +  - ]:         802 :         if (instrument)
     356                 :             :         {
     357         [ #  # ]:           0 :                 if (track_io_timing)
     358                 :             :                 {
     359                 :           0 :                         startreadtime = pgStatBlockReadTime;
     360                 :           0 :                         startwritetime = pgStatBlockWriteTime;
     361                 :           0 :                 }
     362                 :             : 
     363                 :           0 :                 pg_rusage_init(&ru0);
     364                 :           0 :         }
     365                 :             : 
     366                 :             :         /* Used for instrumentation and stats report */
     367                 :         802 :         starttime = GetCurrentTimestamp();
     368                 :             : 
     369                 :             :         /*
     370                 :             :          * Determine which columns to analyze
     371                 :             :          *
     372                 :             :          * Note that system attributes are never analyzed, so we just reject them
     373                 :             :          * at the lookup stage.  We also reject duplicate column mentions.  (We
     374                 :             :          * could alternatively ignore duplicates, but analyzing a column twice
     375                 :             :          * won't work; we'd end up making a conflicting update in pg_statistic.)
     376                 :             :          */
     377         [ +  + ]:         802 :         if (va_cols != NIL)
     378                 :             :         {
     379                 :          14 :                 Bitmapset  *unique_cols = NULL;
     380                 :          14 :                 ListCell   *le;
     381                 :             : 
     382                 :          14 :                 vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
     383                 :             :                                                                                                 sizeof(VacAttrStats *));
     384                 :          14 :                 tcnt = 0;
     385   [ +  -  +  +  :          27 :                 foreach(le, va_cols)
                   +  + ]
     386                 :             :                 {
     387                 :          21 :                         char       *col = strVal(lfirst(le));
     388                 :             : 
     389                 :          21 :                         i = attnameAttNum(onerel, col, false);
     390         [ +  + ]:          21 :                         if (i == InvalidAttrNumber)
     391   [ +  -  +  - ]:           6 :                                 ereport(ERROR,
     392                 :             :                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
     393                 :             :                                                  errmsg("column \"%s\" of relation \"%s\" does not exist",
     394                 :             :                                                                 col, RelationGetRelationName(onerel))));
     395         [ +  + ]:          15 :                         if (bms_is_member(i, unique_cols))
     396   [ +  -  +  - ]:           2 :                                 ereport(ERROR,
     397                 :             :                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
     398                 :             :                                                  errmsg("column \"%s\" of relation \"%s\" appears more than once",
     399                 :             :                                                                 col, RelationGetRelationName(onerel))));
     400                 :          13 :                         unique_cols = bms_add_member(unique_cols, i);
     401                 :             : 
     402                 :          13 :                         vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
     403         [ +  + ]:          13 :                         if (vacattrstats[tcnt] != NULL)
     404                 :          12 :                                 tcnt++;
     405                 :          13 :                 }
     406                 :           6 :                 attr_cnt = tcnt;
     407                 :           6 :         }
     408                 :             :         else
     409                 :             :         {
     410                 :         788 :                 attr_cnt = onerel->rd_att->natts;
     411                 :         788 :                 vacattrstats = (VacAttrStats **)
     412                 :         788 :                         palloc(attr_cnt * sizeof(VacAttrStats *));
     413                 :         788 :                 tcnt = 0;
     414         [ +  + ]:        3454 :                 for (i = 1; i <= attr_cnt; i++)
     415                 :             :                 {
     416                 :        2666 :                         vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
     417         [ +  + ]:        2666 :                         if (vacattrstats[tcnt] != NULL)
     418                 :        2662 :                                 tcnt++;
     419                 :        2666 :                 }
     420                 :         788 :                 attr_cnt = tcnt;
     421                 :             :         }
     422                 :             : 
     423                 :             :         /*
     424                 :             :          * Open all indexes of the relation, and see if there are any analyzable
     425                 :             :          * columns in the indexes.  We do not analyze index columns if there was
     426                 :             :          * an explicit column list in the ANALYZE command, however.
     427                 :             :          *
     428                 :             :          * If we are doing a recursive scan, we don't want to touch the parent's
     429                 :             :          * indexes at all.  If we're processing a partitioned table, we need to
     430                 :             :          * know if there are any indexes, but we don't want to process them.
     431                 :             :          */
     432         [ +  + ]:         794 :         if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     433                 :             :         {
     434                 :         122 :                 List       *idxs = RelationGetIndexList(onerel);
     435                 :             : 
     436                 :         122 :                 Irel = NULL;
     437                 :         122 :                 nindexes = 0;
     438                 :         122 :                 hasindex = idxs != NIL;
     439                 :         122 :                 list_free(idxs);
     440                 :         122 :         }
     441         [ +  + ]:         672 :         else if (!inh)
     442                 :             :         {
     443                 :         653 :                 vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
     444                 :         653 :                 hasindex = nindexes > 0;
     445                 :         653 :         }
     446                 :             :         else
     447                 :             :         {
     448                 :          19 :                 Irel = NULL;
     449                 :          19 :                 nindexes = 0;
     450                 :          19 :                 hasindex = false;
     451                 :             :         }
     452                 :         794 :         indexdata = NULL;
     453         [ +  + ]:         794 :         if (nindexes > 0)
     454                 :             :         {
     455                 :         250 :                 indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));
     456         [ +  + ]:         599 :                 for (ind = 0; ind < nindexes; ind++)
     457                 :             :                 {
     458                 :         349 :                         AnlIndexData *thisdata = &indexdata[ind];
     459                 :         349 :                         IndexInfo  *indexInfo;
     460                 :             : 
     461                 :         349 :                         thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);
     462                 :         349 :                         thisdata->tupleFract = 1.0; /* fix later if partial */
     463   [ +  +  -  + ]:         349 :                         if (indexInfo->ii_Expressions != NIL && va_cols == NIL)
     464                 :             :                         {
     465                 :          15 :                                 ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
     466                 :             : 
     467                 :          15 :                                 thisdata->vacattrstats = (VacAttrStats **)
     468                 :          15 :                                         palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));
     469                 :          15 :                                 tcnt = 0;
     470         [ +  + ]:          30 :                                 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
     471                 :             :                                 {
     472                 :          15 :                                         int                     keycol = indexInfo->ii_IndexAttrNumbers[i];
     473                 :             : 
     474         [ -  + ]:          15 :                                         if (keycol == 0)
     475                 :             :                                         {
     476                 :             :                                                 /* Found an index expression */
     477                 :          15 :                                                 Node       *indexkey;
     478                 :             : 
     479         [ +  - ]:          15 :                                                 if (indexpr_item == NULL)       /* shouldn't happen */
     480   [ #  #  #  # ]:           0 :                                                         elog(ERROR, "too few entries in indexprs list");
     481                 :          15 :                                                 indexkey = (Node *) lfirst(indexpr_item);
     482                 :          30 :                                                 indexpr_item = lnext(indexInfo->ii_Expressions,
     483                 :          15 :                                                                                          indexpr_item);
     484                 :          15 :                                                 thisdata->vacattrstats[tcnt] =
     485                 :          15 :                                                         examine_attribute(Irel[ind], i + 1, indexkey);
     486         [ +  - ]:          15 :                                                 if (thisdata->vacattrstats[tcnt] != NULL)
     487                 :          15 :                                                         tcnt++;
     488                 :          15 :                                         }
     489                 :          15 :                                 }
     490                 :          15 :                                 thisdata->attr_cnt = tcnt;
     491                 :          15 :                         }
     492                 :         349 :                 }
     493                 :         250 :         }
     494                 :             : 
     495                 :             :         /*
     496                 :             :          * Determine how many rows we need to sample, using the worst case from
     497                 :             :          * all analyzable columns.  We use a lower bound of 100 rows to avoid
     498                 :             :          * possible overflow in Vitter's algorithm.  (Note: that will also be the
     499                 :             :          * target in the corner case where there are no analyzable columns.)
     500                 :             :          */
     501                 :         794 :         targrows = 100;
     502         [ +  + ]:        3464 :         for (i = 0; i < attr_cnt; i++)
     503                 :             :         {
     504         [ +  + ]:        2670 :                 if (targrows < vacattrstats[i]->minrows)
     505                 :         783 :                         targrows = vacattrstats[i]->minrows;
     506                 :        2670 :         }
     507         [ +  + ]:        1143 :         for (ind = 0; ind < nindexes; ind++)
     508                 :             :         {
     509                 :         349 :                 AnlIndexData *thisdata = &indexdata[ind];
     510                 :             : 
     511         [ +  + ]:         364 :                 for (i = 0; i < thisdata->attr_cnt; i++)
     512                 :             :                 {
     513         [ +  + ]:          15 :                         if (targrows < thisdata->vacattrstats[i]->minrows)
     514                 :           2 :                                 targrows = thisdata->vacattrstats[i]->minrows;
     515                 :          15 :                 }
     516                 :         349 :         }
     517                 :             : 
     518                 :             :         /*
     519                 :             :          * Look at extended statistics objects too, as those may define custom
     520                 :             :          * statistics target. So we may need to sample more rows and then build
     521                 :             :          * the statistics with enough detail.
     522                 :             :          */
     523                 :         794 :         minrows = ComputeExtStatisticsRows(onerel, attr_cnt, vacattrstats);
     524                 :             : 
     525         [ +  - ]:         794 :         if (targrows < minrows)
     526                 :           0 :                 targrows = minrows;
     527                 :             : 
     528                 :             :         /*
     529                 :             :          * Acquire the sample rows
     530                 :             :          */
     531                 :         794 :         rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
     532                 :         794 :         pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
     533                 :         794 :                                                                  inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
     534                 :             :                                                                  PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
     535         [ +  + ]:         794 :         if (inh)
     536                 :         282 :                 numrows = acquire_inherited_sample_rows(onerel, elevel,
     537                 :         141 :                                                                                                 rows, targrows,
     538                 :             :                                                                                                 &totalrows, &totaldeadrows);
     539                 :             :         else
     540                 :        1306 :                 numrows = (*acquirefunc) (onerel, elevel,
     541                 :         653 :                                                                   rows, targrows,
     542                 :             :                                                                   &totalrows, &totaldeadrows);
     543                 :             : 
     544                 :             :         /*
     545                 :             :          * Compute the statistics.  Temporary results during the calculations for
     546                 :             :          * each column are stored in a child context.  The calc routines are
     547                 :             :          * responsible to make sure that whatever they store into the VacAttrStats
     548                 :             :          * structure is allocated in anl_context.
     549                 :             :          */
     550         [ +  + ]:         794 :         if (numrows > 0)
     551                 :             :         {
     552                 :         716 :                 MemoryContext col_context,
     553                 :             :                                         old_context;
     554                 :             : 
     555                 :         716 :                 pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
     556                 :             :                                                                          PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
     557                 :             : 
     558                 :         716 :                 col_context = AllocSetContextCreate(anl_context,
     559                 :             :                                                                                         "Analyze Column",
     560                 :             :                                                                                         ALLOCSET_DEFAULT_SIZES);
     561                 :         716 :                 old_context = MemoryContextSwitchTo(col_context);
     562                 :             : 
     563         [ +  + ]:        3133 :                 for (i = 0; i < attr_cnt; i++)
     564                 :             :                 {
     565                 :        2417 :                         VacAttrStats *stats = vacattrstats[i];
     566                 :        2417 :                         AttributeOpts *aopt;
     567                 :             : 
     568                 :        2417 :                         stats->rows = rows;
     569                 :        2417 :                         stats->tupDesc = onerel->rd_att;
     570                 :        4834 :                         stats->compute_stats(stats,
     571                 :             :                                                                  std_fetch_func,
     572                 :        2417 :                                                                  numrows,
     573                 :        2417 :                                                                  totalrows);
     574                 :             : 
     575                 :             :                         /*
     576                 :             :                          * If the appropriate flavor of the n_distinct option is
     577                 :             :                          * specified, override with the corresponding value.
     578                 :             :                          */
     579                 :        2417 :                         aopt = get_attribute_options(onerel->rd_id, stats->tupattnum);
     580         [ +  + ]:        2417 :                         if (aopt != NULL)
     581                 :             :                         {
     582                 :           1 :                                 float8          n_distinct;
     583                 :             : 
     584         [ -  + ]:           1 :                                 n_distinct = inh ? aopt->n_distinct_inherited : aopt->n_distinct;
     585         [ +  - ]:           1 :                                 if (n_distinct != 0.0)
     586                 :           1 :                                         stats->stadistinct = n_distinct;
     587                 :           1 :                         }
     588                 :             : 
     589                 :        2417 :                         MemoryContextReset(col_context);
     590                 :        2417 :                 }
     591                 :             : 
     592         [ +  + ]:         716 :                 if (nindexes > 0)
     593                 :         420 :                         compute_index_stats(onerel, totalrows,
     594                 :         210 :                                                                 indexdata, nindexes,
     595                 :         210 :                                                                 rows, numrows,
     596                 :         210 :                                                                 col_context);
     597                 :             : 
     598                 :         716 :                 MemoryContextSwitchTo(old_context);
     599                 :         716 :                 MemoryContextDelete(col_context);
     600                 :             : 
     601                 :             :                 /*
     602                 :             :                  * Emit the completed stats rows into pg_statistic, replacing any
     603                 :             :                  * previous statistics for the target columns.  (If there are stats in
     604                 :             :                  * pg_statistic for columns we didn't process, we leave them alone.)
     605                 :             :                  */
     606                 :        1432 :                 update_attstats(RelationGetRelid(onerel), inh,
     607                 :         716 :                                                 attr_cnt, vacattrstats);
     608                 :             : 
     609         [ +  + ]:        1000 :                 for (ind = 0; ind < nindexes; ind++)
     610                 :             :                 {
     611                 :         284 :                         AnlIndexData *thisdata = &indexdata[ind];
     612                 :             : 
     613                 :         568 :                         update_attstats(RelationGetRelid(Irel[ind]), false,
     614                 :         284 :                                                         thisdata->attr_cnt, thisdata->vacattrstats);
     615                 :         284 :                 }
     616                 :             : 
     617                 :             :                 /* Build extended statistics (if there are any). */
     618                 :        1432 :                 BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
     619                 :         716 :                                                                    attr_cnt, vacattrstats);
     620                 :         716 :         }
     621                 :             : 
     622                 :         794 :         pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
     623                 :             :                                                                  PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
     624                 :             : 
     625                 :             :         /*
     626                 :             :          * Update pages/tuples stats in pg_class ... but not if we're doing
     627                 :             :          * inherited stats.
     628                 :             :          *
     629                 :             :          * We assume that VACUUM hasn't set pg_class.reltuples already, even
     630                 :             :          * during a VACUUM ANALYZE.  Although VACUUM often updates pg_class,
     631                 :             :          * exceptions exist.  A "VACUUM (ANALYZE, INDEX_CLEANUP OFF)" command will
     632                 :             :          * never update pg_class entries for index relations.  It's also possible
     633                 :             :          * that an individual index's pg_class entry won't be updated during
     634                 :             :          * VACUUM if the index AM returns NULL from its amvacuumcleanup() routine.
     635                 :             :          */
     636         [ +  + ]:         794 :         if (!inh)
     637                 :             :         {
     638                 :         652 :                 BlockNumber relallvisible = 0;
     639                 :         652 :                 BlockNumber relallfrozen = 0;
     640                 :             : 
     641   [ -  +  #  #  :         652 :                 if (RELKIND_HAS_STORAGE(onerel->rd_rel->relkind))
          #  #  #  #  #  
                      # ]
     642                 :         652 :                         visibilitymap_count(onerel, &relallvisible, &relallfrozen);
     643                 :             : 
     644                 :             :                 /*
     645                 :             :                  * Update pg_class for table relation.  CCI first, in case acquirefunc
     646                 :             :                  * updated pg_class.
     647                 :             :                  */
     648                 :         652 :                 CommandCounterIncrement();
     649                 :        1304 :                 vac_update_relstats(onerel,
     650                 :         652 :                                                         relpages,
     651                 :         652 :                                                         totalrows,
     652                 :         652 :                                                         relallvisible,
     653                 :         652 :                                                         relallfrozen,
     654                 :         652 :                                                         hasindex,
     655                 :             :                                                         InvalidTransactionId,
     656                 :             :                                                         InvalidMultiXactId,
     657                 :             :                                                         NULL, NULL,
     658                 :         652 :                                                         in_outer_xact);
     659                 :             : 
     660                 :             :                 /* Same for indexes */
     661         [ +  + ]:         999 :                 for (ind = 0; ind < nindexes; ind++)
     662                 :             :                 {
     663                 :         347 :                         AnlIndexData *thisdata = &indexdata[ind];
     664                 :         347 :                         double          totalindexrows;
     665                 :             : 
     666                 :         347 :                         totalindexrows = ceil(thisdata->tupleFract * totalrows);
     667                 :         694 :                         vac_update_relstats(Irel[ind],
     668                 :         347 :                                                                 RelationGetNumberOfBlocks(Irel[ind]),
     669                 :         347 :                                                                 totalindexrows,
     670                 :             :                                                                 0, 0,
     671                 :             :                                                                 false,
     672                 :             :                                                                 InvalidTransactionId,
     673                 :             :                                                                 InvalidMultiXactId,
     674                 :             :                                                                 NULL, NULL,
     675                 :         347 :                                                                 in_outer_xact);
     676                 :         347 :                 }
     677                 :         652 :         }
     678         [ +  + ]:         142 :         else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     679                 :             :         {
     680                 :             :                 /*
     681                 :             :                  * Partitioned tables don't have storage, so we don't set any fields
     682                 :             :                  * in their pg_class entries except for reltuples and relhasindex.
     683                 :             :                  */
     684                 :         123 :                 CommandCounterIncrement();
     685                 :         246 :                 vac_update_relstats(onerel, -1, totalrows,
     686                 :         123 :                                                         0, 0, hasindex, InvalidTransactionId,
     687                 :             :                                                         InvalidMultiXactId,
     688                 :             :                                                         NULL, NULL,
     689                 :         123 :                                                         in_outer_xact);
     690                 :         123 :         }
     691                 :             : 
     692                 :             :         /*
     693                 :             :          * Now report ANALYZE to the cumulative stats system.  For regular tables,
     694                 :             :          * we do it only if not doing inherited stats.  For partitioned tables, we
     695                 :             :          * only do it for inherited stats. (We're never called for not-inherited
     696                 :             :          * stats on partitioned tables anyway.)
     697                 :             :          *
     698                 :             :          * Reset the mod_since_analyze counter only if we analyzed all columns;
     699                 :             :          * otherwise, there is still work for auto-analyze to do.
     700                 :             :          */
     701         [ +  + ]:         794 :         if (!inh)
     702                 :        1304 :                 pgstat_report_analyze(onerel, totalrows, totaldeadrows,
     703                 :         652 :                                                           (va_cols == NIL), starttime);
     704         [ +  + ]:         142 :         else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     705                 :         123 :                 pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL), starttime);
     706                 :             : 
     707                 :             :         /*
     708                 :             :          * If this isn't part of VACUUM ANALYZE, let index AMs do cleanup.
     709                 :             :          *
     710                 :             :          * Note that most index AMs perform a no-op as a matter of policy for
     711                 :             :          * amvacuumcleanup() when called in ANALYZE-only mode.  The only exception
     712                 :             :          * among core index AMs is GIN/ginvacuumcleanup().
     713                 :             :          */
     714         [ +  + ]:         794 :         if (!(params.options & VACOPT_VACUUM))
     715                 :             :         {
     716         [ +  + ]:        1002 :                 for (ind = 0; ind < nindexes; ind++)
     717                 :             :                 {
     718                 :         297 :                         IndexBulkDeleteResult *stats;
     719                 :         297 :                         IndexVacuumInfo ivinfo;
     720                 :             : 
     721                 :         297 :                         ivinfo.index = Irel[ind];
     722                 :         297 :                         ivinfo.heaprel = onerel;
     723                 :         297 :                         ivinfo.analyze_only = true;
     724                 :         297 :                         ivinfo.estimated_count = true;
     725                 :         297 :                         ivinfo.message_level = elevel;
     726                 :         297 :                         ivinfo.num_heap_tuples = onerel->rd_rel->reltuples;
     727                 :         297 :                         ivinfo.strategy = vac_strategy;
     728                 :             : 
     729                 :         297 :                         stats = index_vacuum_cleanup(&ivinfo, NULL);
     730                 :             : 
     731         [ -  + ]:         297 :                         if (stats)
     732                 :           0 :                                 pfree(stats);
     733                 :         297 :                 }
     734                 :         705 :         }
     735                 :             : 
     736                 :             :         /* Done with indexes */
     737                 :         794 :         vac_close_indexes(nindexes, Irel, NoLock);
     738                 :             : 
     739                 :             :         /* Log the action if appropriate */
     740         [ +  - ]:         794 :         if (instrument)
     741                 :             :         {
     742                 :           0 :                 TimestampTz endtime = GetCurrentTimestamp();
     743                 :             : 
     744   [ #  #  #  #  :           0 :                 if (verbose || params.log_analyze_min_duration == 0 ||
                   #  # ]
     745                 :           0 :                         TimestampDifferenceExceeds(starttime, endtime,
     746                 :           0 :                                                                            params.log_analyze_min_duration))
     747                 :             :                 {
     748                 :           0 :                         long            delay_in_ms;
     749                 :           0 :                         WalUsage        walusage;
     750                 :           0 :                         double          read_rate = 0;
     751                 :           0 :                         double          write_rate = 0;
     752                 :           0 :                         char       *msgfmt;
     753                 :           0 :                         StringInfoData buf;
     754                 :           0 :                         int64           total_blks_hit;
     755                 :           0 :                         int64           total_blks_read;
     756                 :           0 :                         int64           total_blks_dirtied;
     757                 :             : 
     758                 :           0 :                         memset(&bufferusage, 0, sizeof(BufferUsage));
     759                 :           0 :                         BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
     760                 :           0 :                         memset(&walusage, 0, sizeof(WalUsage));
     761                 :           0 :                         WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
     762                 :             : 
     763                 :           0 :                         total_blks_hit = bufferusage.shared_blks_hit +
     764                 :           0 :                                 bufferusage.local_blks_hit;
     765                 :           0 :                         total_blks_read = bufferusage.shared_blks_read +
     766                 :           0 :                                 bufferusage.local_blks_read;
     767                 :           0 :                         total_blks_dirtied = bufferusage.shared_blks_dirtied +
     768                 :           0 :                                 bufferusage.local_blks_dirtied;
     769                 :             : 
     770                 :             :                         /*
     771                 :             :                          * We do not expect an analyze to take > 25 days and it simplifies
     772                 :             :                          * things a bit to use TimestampDifferenceMilliseconds.
     773                 :             :                          */
     774                 :           0 :                         delay_in_ms = TimestampDifferenceMilliseconds(starttime, endtime);
     775                 :             : 
     776                 :             :                         /*
     777                 :             :                          * Note that we are reporting these read/write rates in the same
     778                 :             :                          * manner as VACUUM does, which means that while the 'average read
     779                 :             :                          * rate' here actually corresponds to page misses and resulting
     780                 :             :                          * reads which are also picked up by track_io_timing, if enabled,
     781                 :             :                          * the 'average write rate' is actually talking about the rate of
     782                 :             :                          * pages being dirtied, not being written out, so it's typical to
     783                 :             :                          * have a non-zero 'avg write rate' while I/O timings only reports
     784                 :             :                          * reads.
     785                 :             :                          *
     786                 :             :                          * It's not clear that an ANALYZE will ever result in
     787                 :             :                          * FlushBuffer() being called, but we track and support reporting
     788                 :             :                          * on I/O write time in case that changes as it's practically free
     789                 :             :                          * to do so anyway.
     790                 :             :                          */
     791                 :             : 
     792         [ #  # ]:           0 :                         if (delay_in_ms > 0)
     793                 :             :                         {
     794                 :           0 :                                 read_rate = (double) BLCKSZ * total_blks_read /
     795                 :           0 :                                         (1024 * 1024) / (delay_in_ms / 1000.0);
     796                 :           0 :                                 write_rate = (double) BLCKSZ * total_blks_dirtied /
     797                 :           0 :                                         (1024 * 1024) / (delay_in_ms / 1000.0);
     798                 :           0 :                         }
     799                 :             : 
     800                 :             :                         /*
     801                 :             :                          * We split this up so we don't emit empty I/O timing values when
     802                 :             :                          * track_io_timing isn't enabled.
     803                 :             :                          */
     804                 :             : 
     805                 :           0 :                         initStringInfo(&buf);
     806                 :             : 
     807         [ #  # ]:           0 :                         if (AmAutoVacuumWorkerProcess())
     808                 :           0 :                                 msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n");
     809                 :             :                         else
     810                 :           0 :                                 msgfmt = _("finished analyzing table \"%s.%s.%s\"\n");
     811                 :             : 
     812                 :           0 :                         appendStringInfo(&buf, msgfmt,
     813                 :           0 :                                                          get_database_name(MyDatabaseId),
     814                 :           0 :                                                          get_namespace_name(RelationGetNamespace(onerel)),
     815                 :           0 :                                                          RelationGetRelationName(onerel));
     816         [ #  # ]:           0 :                         if (track_cost_delay_timing)
     817                 :             :                         {
     818                 :             :                                 /*
     819                 :             :                                  * We bypass the changecount mechanism because this value is
     820                 :             :                                  * only updated by the calling process.
     821                 :             :                                  */
     822                 :           0 :                                 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
     823                 :           0 :                                                                  (double) MyBEEntry->st_progress_param[PROGRESS_ANALYZE_DELAY_TIME] / 1000000.0);
     824                 :           0 :                         }
     825         [ #  # ]:           0 :                         if (track_io_timing)
     826                 :             :                         {
     827                 :           0 :                                 double          read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
     828                 :           0 :                                 double          write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
     829                 :             : 
     830                 :           0 :                                 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
     831                 :           0 :                                                                  read_ms, write_ms);
     832                 :           0 :                         }
     833                 :           0 :                         appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     834                 :           0 :                                                          read_rate, write_rate);
     835                 :           0 :                         appendStringInfo(&buf, _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
     836                 :           0 :                                                          total_blks_hit,
     837                 :           0 :                                                          total_blks_read,
     838                 :           0 :                                                          total_blks_dirtied);
     839                 :           0 :                         appendStringInfo(&buf,
     840                 :           0 :                                                          _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
     841                 :           0 :                                                          walusage.wal_records,
     842                 :           0 :                                                          walusage.wal_fpi,
     843                 :           0 :                                                          walusage.wal_bytes,
     844                 :           0 :                                                          walusage.wal_fpi_bytes,
     845                 :           0 :                                                          walusage.wal_buffers_full);
     846                 :           0 :                         appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
     847                 :             : 
     848   [ #  #  #  #  :           0 :                         ereport(verbose ? INFO : LOG,
          #  #  #  #  #  
                      # ]
     849                 :             :                                         (errmsg_internal("%s", buf.data)));
     850                 :             : 
     851                 :           0 :                         pfree(buf.data);
     852                 :           0 :                 }
     853                 :           0 :         }
     854                 :             : 
     855                 :             :         /* Roll back any GUC changes executed by index functions */
     856                 :         794 :         AtEOXact_GUC(false, save_nestlevel);
     857                 :             : 
     858                 :             :         /* Restore userid and security context */
     859                 :         794 :         SetUserIdAndSecContext(save_userid, save_sec_context);
     860                 :             : 
     861                 :             :         /* Restore current context and release memory */
     862                 :         794 :         MemoryContextSwitchTo(caller_context);
     863                 :         794 :         MemoryContextDelete(anl_context);
     864                 :         794 :         anl_context = NULL;
     865                 :         794 : }
     866                 :             : 
     867                 :             : /*
     868                 :             :  * Compute statistics about indexes of a relation
     869                 :             :  */
     870                 :             : static void
     871                 :         210 : compute_index_stats(Relation onerel, double totalrows,
     872                 :             :                                         AnlIndexData *indexdata, int nindexes,
     873                 :             :                                         HeapTuple *rows, int numrows,
     874                 :             :                                         MemoryContext col_context)
     875                 :             : {
     876                 :         210 :         MemoryContext ind_context,
     877                 :             :                                 old_context;
     878                 :         210 :         Datum           values[INDEX_MAX_KEYS];
     879                 :         210 :         bool            isnull[INDEX_MAX_KEYS];
     880                 :         210 :         int                     ind,
     881                 :             :                                 i;
     882                 :             : 
     883                 :         210 :         ind_context = AllocSetContextCreate(anl_context,
     884                 :             :                                                                                 "Analyze Index",
     885                 :             :                                                                                 ALLOCSET_DEFAULT_SIZES);
     886                 :         210 :         old_context = MemoryContextSwitchTo(ind_context);
     887                 :             : 
     888         [ +  + ]:         496 :         for (ind = 0; ind < nindexes; ind++)
     889                 :             :         {
     890                 :         286 :                 AnlIndexData *thisdata = &indexdata[ind];
     891                 :         286 :                 IndexInfo  *indexInfo = thisdata->indexInfo;
     892                 :         286 :                 int                     attr_cnt = thisdata->attr_cnt;
     893                 :         286 :                 TupleTableSlot *slot;
     894                 :         286 :                 EState     *estate;
     895                 :         286 :                 ExprContext *econtext;
     896                 :         286 :                 ExprState  *predicate;
     897                 :         286 :                 Datum      *exprvals;
     898                 :         286 :                 bool       *exprnulls;
     899                 :         286 :                 int                     numindexrows,
     900                 :             :                                         tcnt,
     901                 :             :                                         rowno;
     902                 :         286 :                 double          totalindexrows;
     903                 :             : 
     904                 :             :                 /* Ignore index if no columns to analyze and not partial */
     905   [ +  +  +  + ]:         286 :                 if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)
     906                 :         266 :                         continue;
     907                 :             : 
     908                 :             :                 /*
     909                 :             :                  * Need an EState for evaluation of index expressions and
     910                 :             :                  * partial-index predicates.  Create it in the per-index context to be
     911                 :             :                  * sure it gets cleaned up at the bottom of the loop.
     912                 :             :                  */
     913                 :          20 :                 estate = CreateExecutorState();
     914         [ -  + ]:          20 :                 econtext = GetPerTupleExprContext(estate);
     915                 :             :                 /* Need a slot to hold the current heap tuple, too */
     916                 :          20 :                 slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel),
     917                 :             :                                                                                 &TTSOpsHeapTuple);
     918                 :             : 
     919                 :             :                 /* Arrange for econtext's scan tuple to be the tuple under test */
     920                 :          20 :                 econtext->ecxt_scantuple = slot;
     921                 :             : 
     922                 :             :                 /* Set up execution state for predicate. */
     923                 :          20 :                 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
     924                 :             : 
     925                 :             :                 /* Compute and save index expression values */
     926                 :          20 :                 exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));
     927                 :          20 :                 exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));
     928                 :          20 :                 numindexrows = 0;
     929                 :          20 :                 tcnt = 0;
     930         [ +  + ]:       17445 :                 for (rowno = 0; rowno < numrows; rowno++)
     931                 :             :                 {
     932                 :       17425 :                         HeapTuple       heapTuple = rows[rowno];
     933                 :             : 
     934                 :       17425 :                         vacuum_delay_point(true);
     935                 :             : 
     936                 :             :                         /*
     937                 :             :                          * Reset the per-tuple context each time, to reclaim any cruft
     938                 :             :                          * left behind by evaluating the predicate or index expressions.
     939                 :             :                          */
     940                 :       17425 :                         ResetExprContext(econtext);
     941                 :             : 
     942                 :             :                         /* Set up for predicate or expression evaluation */
     943                 :       17425 :                         ExecStoreHeapTuple(heapTuple, slot, false);
     944                 :             : 
     945                 :             :                         /* If index is partial, check predicate */
     946         [ +  + ]:       17425 :                         if (predicate != NULL)
     947                 :             :                         {
     948         [ +  + ]:        4011 :                                 if (!ExecQual(predicate, econtext))
     949                 :        2888 :                                         continue;
     950                 :        1123 :                         }
     951                 :       14537 :                         numindexrows++;
     952                 :             : 
     953         [ +  + ]:       14537 :                         if (attr_cnt > 0)
     954                 :             :                         {
     955                 :             :                                 /*
     956                 :             :                                  * Evaluate the index row to compute expression values. We
     957                 :             :                                  * could do this by hand, but FormIndexDatum is convenient.
     958                 :             :                                  */
     959                 :       26830 :                                 FormIndexDatum(indexInfo,
     960                 :       13415 :                                                            slot,
     961                 :       13415 :                                                            estate,
     962                 :       13415 :                                                            values,
     963                 :       13415 :                                                            isnull);
     964                 :             : 
     965                 :             :                                 /*
     966                 :             :                                  * Save just the columns we care about.  We copy the values
     967                 :             :                                  * into ind_context from the estate's per-tuple context.
     968                 :             :                                  */
     969         [ +  + ]:       26829 :                                 for (i = 0; i < attr_cnt; i++)
     970                 :             :                                 {
     971                 :       13414 :                                         VacAttrStats *stats = thisdata->vacattrstats[i];
     972                 :       13414 :                                         int                     attnum = stats->tupattnum;
     973                 :             : 
     974         [ +  + ]:       13414 :                                         if (isnull[attnum - 1])
     975                 :             :                                         {
     976                 :           1 :                                                 exprvals[tcnt] = (Datum) 0;
     977                 :           1 :                                                 exprnulls[tcnt] = true;
     978                 :           1 :                                         }
     979                 :             :                                         else
     980                 :             :                                         {
     981                 :       26826 :                                                 exprvals[tcnt] = datumCopy(values[attnum - 1],
     982                 :       13413 :                                                                                                    stats->attrtype->typbyval,
     983                 :       13413 :                                                                                                    stats->attrtype->typlen);
     984                 :       13413 :                                                 exprnulls[tcnt] = false;
     985                 :             :                                         }
     986                 :       13414 :                                         tcnt++;
     987                 :       13414 :                                 }
     988                 :       13415 :                         }
     989         [ +  + ]:       17425 :                 }
     990                 :             : 
     991                 :             :                 /*
     992                 :             :                  * Having counted the number of rows that pass the predicate in the
     993                 :             :                  * sample, we can estimate the total number of rows in the index.
     994                 :             :                  */
     995                 :          20 :                 thisdata->tupleFract = (double) numindexrows / (double) numrows;
     996                 :          20 :                 totalindexrows = ceil(thisdata->tupleFract * totalrows);
     997                 :             : 
     998                 :             :                 /*
     999                 :             :                  * Now we can compute the statistics for the expression columns.
    1000                 :             :                  */
    1001         [ +  + ]:          20 :                 if (numindexrows > 0)
    1002                 :             :                 {
    1003                 :          18 :                         MemoryContextSwitchTo(col_context);
    1004         [ +  + ]:          29 :                         for (i = 0; i < attr_cnt; i++)
    1005                 :             :                         {
    1006                 :          11 :                                 VacAttrStats *stats = thisdata->vacattrstats[i];
    1007                 :             : 
    1008                 :          11 :                                 stats->exprvals = exprvals + i;
    1009                 :          11 :                                 stats->exprnulls = exprnulls + i;
    1010                 :          11 :                                 stats->rowstride = attr_cnt;
    1011                 :          22 :                                 stats->compute_stats(stats,
    1012                 :             :                                                                          ind_fetch_func,
    1013                 :          11 :                                                                          numindexrows,
    1014                 :          11 :                                                                          totalindexrows);
    1015                 :             : 
    1016                 :          11 :                                 MemoryContextReset(col_context);
    1017                 :          11 :                         }
    1018                 :          18 :                 }
    1019                 :             : 
    1020                 :             :                 /* And clean up */
    1021                 :          20 :                 MemoryContextSwitchTo(ind_context);
    1022                 :             : 
    1023                 :          20 :                 ExecDropSingleTupleTableSlot(slot);
    1024                 :          20 :                 FreeExecutorState(estate);
    1025                 :          20 :                 MemoryContextReset(ind_context);
    1026         [ +  + ]:         286 :         }
    1027                 :             : 
    1028                 :         210 :         MemoryContextSwitchTo(old_context);
    1029                 :         210 :         MemoryContextDelete(ind_context);
    1030                 :         210 : }
    1031                 :             : 
    1032                 :             : /*
    1033                 :             :  * examine_attribute -- pre-analysis of a single column
    1034                 :             :  *
    1035                 :             :  * Determine whether the column is analyzable; if so, create and initialize
    1036                 :             :  * a VacAttrStats struct for it.  If not, return NULL.
    1037                 :             :  *
    1038                 :             :  * If index_expr isn't NULL, then we're trying to analyze an expression index,
    1039                 :             :  * and index_expr is the expression tree representing the column's data.
    1040                 :             :  */
    1041                 :             : static VacAttrStats *
    1042                 :        2693 : examine_attribute(Relation onerel, int attnum, Node *index_expr)
    1043                 :             : {
    1044                 :        2693 :         Form_pg_attribute attr = TupleDescAttr(onerel->rd_att, attnum - 1);
    1045                 :        2693 :         int                     attstattarget;
    1046                 :        2693 :         HeapTuple       atttuple;
    1047                 :        2693 :         Datum           dat;
    1048                 :        2693 :         bool            isnull;
    1049                 :        2693 :         HeapTuple       typtuple;
    1050                 :        2693 :         VacAttrStats *stats;
    1051                 :        2693 :         int                     i;
    1052                 :        2693 :         bool            ok;
    1053                 :             : 
    1054                 :             :         /* Never analyze dropped columns */
    1055         [ -  + ]:        2693 :         if (attr->attisdropped)
    1056                 :           0 :                 return NULL;
    1057                 :             : 
    1058                 :             :         /* Don't analyze virtual generated columns */
    1059         [ +  + ]:        2693 :         if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
    1060                 :           3 :                 return NULL;
    1061                 :             : 
    1062                 :             :         /*
    1063                 :             :          * Get attstattarget value.  Set to -1 if null.  (Analyze functions expect
    1064                 :             :          * -1 to mean use default_statistics_target; see for example
    1065                 :             :          * std_typanalyze.)
    1066                 :             :          */
    1067                 :        2690 :         atttuple = SearchSysCache2(ATTNUM, ObjectIdGetDatum(RelationGetRelid(onerel)), Int16GetDatum(attnum));
    1068         [ +  - ]:        2690 :         if (!HeapTupleIsValid(atttuple))
    1069   [ #  #  #  # ]:           0 :                 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
    1070                 :             :                          attnum, RelationGetRelid(onerel));
    1071                 :        2690 :         dat = SysCacheGetAttr(ATTNUM, atttuple, Anum_pg_attribute_attstattarget, &isnull);
    1072         [ +  + ]:        2690 :         attstattarget = isnull ? -1 : DatumGetInt16(dat);
    1073                 :        2690 :         ReleaseSysCache(atttuple);
    1074                 :             : 
    1075                 :             :         /* Don't analyze column if user has specified not to */
    1076         [ +  + ]:        2690 :         if (attstattarget == 0)
    1077                 :           1 :                 return NULL;
    1078                 :             : 
    1079                 :             :         /*
    1080                 :             :          * Create the VacAttrStats struct.
    1081                 :             :          */
    1082                 :        2689 :         stats = palloc0_object(VacAttrStats);
    1083                 :        2689 :         stats->attstattarget = attstattarget;
    1084                 :             : 
    1085                 :             :         /*
    1086                 :             :          * When analyzing an expression index, believe the expression tree's type
    1087                 :             :          * not the column datatype --- the latter might be the opckeytype storage
    1088                 :             :          * type of the opclass, which is not interesting for our purposes.  (Note:
    1089                 :             :          * if we did anything with non-expression index columns, we'd need to
    1090                 :             :          * figure out where to get the correct type info from, but for now that's
    1091                 :             :          * not a problem.)      It's not clear whether anyone will care about the
    1092                 :             :          * typmod, but we store that too just in case.
    1093                 :             :          */
    1094         [ +  + ]:        2689 :         if (index_expr)
    1095                 :             :         {
    1096                 :          15 :                 stats->attrtypid = exprType(index_expr);
    1097                 :          15 :                 stats->attrtypmod = exprTypmod(index_expr);
    1098                 :             : 
    1099                 :             :                 /*
    1100                 :             :                  * If a collation has been specified for the index column, use that in
    1101                 :             :                  * preference to anything else; but if not, fall back to whatever we
    1102                 :             :                  * can get from the expression.
    1103                 :             :                  */
    1104         [ +  + ]:          15 :                 if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
    1105                 :           2 :                         stats->attrcollid = onerel->rd_indcollation[attnum - 1];
    1106                 :             :                 else
    1107                 :          13 :                         stats->attrcollid = exprCollation(index_expr);
    1108                 :          15 :         }
    1109                 :             :         else
    1110                 :             :         {
    1111                 :        2674 :                 stats->attrtypid = attr->atttypid;
    1112                 :        2674 :                 stats->attrtypmod = attr->atttypmod;
    1113                 :        2674 :                 stats->attrcollid = attr->attcollation;
    1114                 :             :         }
    1115                 :             : 
    1116                 :        2689 :         typtuple = SearchSysCacheCopy1(TYPEOID,
    1117                 :             :                                                                    ObjectIdGetDatum(stats->attrtypid));
    1118         [ +  - ]:        2689 :         if (!HeapTupleIsValid(typtuple))
    1119   [ #  #  #  # ]:           0 :                 elog(ERROR, "cache lookup failed for type %u", stats->attrtypid);
    1120                 :        2689 :         stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
    1121                 :        2689 :         stats->anl_context = anl_context;
    1122                 :        2689 :         stats->tupattnum = attnum;
    1123                 :             : 
    1124                 :             :         /*
    1125                 :             :          * The fields describing the stats->stavalues[n] element types default to
    1126                 :             :          * the type of the data being analyzed, but the type-specific typanalyze
    1127                 :             :          * function can change them if it wants to store something else.
    1128                 :             :          */
    1129         [ +  + ]:       16134 :         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
    1130                 :             :         {
    1131                 :       13445 :                 stats->statypid[i] = stats->attrtypid;
    1132                 :       13445 :                 stats->statyplen[i] = stats->attrtype->typlen;
    1133                 :       13445 :                 stats->statypbyval[i] = stats->attrtype->typbyval;
    1134                 :       13445 :                 stats->statypalign[i] = stats->attrtype->typalign;
    1135                 :       13445 :         }
    1136                 :             : 
    1137                 :             :         /*
    1138                 :             :          * Call the type-specific typanalyze function.  If none is specified, use
    1139                 :             :          * std_typanalyze().
    1140                 :             :          */
    1141         [ +  + ]:        2689 :         if (OidIsValid(stats->attrtype->typanalyze))
    1142                 :          75 :                 ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
    1143                 :             :                                                                                    PointerGetDatum(stats)));
    1144                 :             :         else
    1145                 :        2614 :                 ok = std_typanalyze(stats);
    1146                 :             : 
    1147   [ +  -  +  -  :        2689 :         if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
                   -  + ]
    1148                 :             :         {
    1149                 :           0 :                 heap_freetuple(typtuple);
    1150                 :           0 :                 pfree(stats);
    1151                 :           0 :                 return NULL;
    1152                 :             :         }
    1153                 :             : 
    1154                 :        2689 :         return stats;
    1155                 :        2693 : }
    1156                 :             : 
    1157                 :             : /*
    1158                 :             :  * Read stream callback returning the next BlockNumber as chosen by the
    1159                 :             :  * BlockSampling algorithm.
    1160                 :             :  */
    1161                 :             : static BlockNumber
    1162                 :       11697 : block_sampling_read_stream_next(ReadStream *stream,
    1163                 :             :                                                                 void *callback_private_data,
    1164                 :             :                                                                 void *per_buffer_data)
    1165                 :             : {
    1166                 :       11697 :         BlockSamplerData *bs = callback_private_data;
    1167                 :             : 
    1168         [ +  + ]:       11697 :         return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
    1169                 :       11697 : }
    1170                 :             : 
    1171                 :             : /*
    1172                 :             :  * acquire_sample_rows -- acquire a random sample of rows from the table
    1173                 :             :  *
    1174                 :             :  * Selected rows are returned in the caller-allocated array rows[], which
    1175                 :             :  * must have at least targrows entries.
    1176                 :             :  * The actual number of rows selected is returned as the function result.
    1177                 :             :  * We also estimate the total numbers of live and dead rows in the table,
    1178                 :             :  * and return them into *totalrows and *totaldeadrows, respectively.
    1179                 :             :  *
    1180                 :             :  * The returned list of tuples is in order by physical position in the table.
    1181                 :             :  * (We will rely on this later to derive correlation estimates.)
    1182                 :             :  *
    1183                 :             :  * As of May 2004 we use a new two-stage method:  Stage one selects up
    1184                 :             :  * to targrows random blocks (or all blocks, if there aren't so many).
    1185                 :             :  * Stage two scans these blocks and uses the Vitter algorithm to create
    1186                 :             :  * a random sample of targrows rows (or less, if there are less in the
    1187                 :             :  * sample of blocks).  The two stages are executed simultaneously: each
    1188                 :             :  * block is processed as soon as stage one returns its number and while
    1189                 :             :  * the rows are read stage two controls which ones are to be inserted
    1190                 :             :  * into the sample.
    1191                 :             :  *
    1192                 :             :  * Although every row has an equal chance of ending up in the final
    1193                 :             :  * sample, this sampling method is not perfect: not every possible
    1194                 :             :  * sample has an equal chance of being selected.  For large relations
    1195                 :             :  * the number of different blocks represented by the sample tends to be
    1196                 :             :  * too small.  We can live with that for now.  Improvements are welcome.
    1197                 :             :  *
    1198                 :             :  * An important property of this sampling method is that because we do
    1199                 :             :  * look at a statistically unbiased set of blocks, we should get
    1200                 :             :  * unbiased estimates of the average numbers of live and dead rows per
    1201                 :             :  * block.  The previous sampling method put too much credence in the row
    1202                 :             :  * density near the start of the table.
    1203                 :             :  */
    1204                 :             : static int
    1205                 :        1000 : acquire_sample_rows(Relation onerel, int elevel,
    1206                 :             :                                         HeapTuple *rows, int targrows,
    1207                 :             :                                         double *totalrows, double *totaldeadrows)
    1208                 :             : {
    1209                 :        1000 :         int                     numrows = 0;    /* # rows now in reservoir */
    1210                 :        1000 :         double          samplerows = 0; /* total # rows collected */
    1211                 :        1000 :         double          liverows = 0;   /* # live rows seen */
    1212                 :        1000 :         double          deadrows = 0;   /* # dead rows seen */
    1213                 :        1000 :         double          rowstoskip = -1;        /* -1 means not set yet */
    1214                 :        1000 :         uint32          randseed;               /* Seed for block sampler(s) */
    1215                 :        1000 :         BlockNumber totalblocks;
    1216                 :        1000 :         TransactionId OldestXmin;
    1217                 :        1000 :         BlockSamplerData bs;
    1218                 :        1000 :         ReservoirStateData rstate;
    1219                 :        1000 :         TupleTableSlot *slot;
    1220                 :        1000 :         TableScanDesc scan;
    1221                 :        1000 :         BlockNumber nblocks;
    1222                 :        1000 :         BlockNumber blksdone = 0;
    1223                 :        1000 :         ReadStream *stream;
    1224                 :             : 
    1225         [ +  - ]:        1000 :         Assert(targrows > 0);
    1226                 :             : 
    1227                 :        1000 :         totalblocks = RelationGetNumberOfBlocks(onerel);
    1228                 :             : 
    1229                 :             :         /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
    1230                 :        1000 :         OldestXmin = GetOldestNonRemovableTransactionId(onerel);
    1231                 :             : 
    1232                 :             :         /* Prepare for sampling block numbers */
    1233                 :        1000 :         randseed = pg_prng_uint32(&pg_global_prng_state);
    1234                 :        1000 :         nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
    1235                 :             : 
    1236                 :             :         /* Report sampling block numbers */
    1237                 :        1000 :         pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
    1238                 :        1000 :                                                                  nblocks);
    1239                 :             : 
    1240                 :             :         /* Prepare for sampling rows */
    1241                 :        1000 :         reservoir_init_selection_state(&rstate, targrows);
    1242                 :             : 
    1243                 :        1000 :         scan = table_beginscan_analyze(onerel);
    1244                 :        1000 :         slot = table_slot_create(onerel, NULL);
    1245                 :             : 
    1246                 :             :         /*
    1247                 :             :          * It is safe to use batching, as block_sampling_read_stream_next never
    1248                 :             :          * blocks.
    1249                 :             :          */
    1250                 :        1000 :         stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
    1251                 :             :                                                                                 READ_STREAM_USE_BATCHING,
    1252                 :        1000 :                                                                                 vac_strategy,
    1253                 :        1000 :                                                                                 scan->rs_rd,
    1254                 :             :                                                                                 MAIN_FORKNUM,
    1255                 :             :                                                                                 block_sampling_read_stream_next,
    1256                 :             :                                                                                 &bs,
    1257                 :             :                                                                                 0);
    1258                 :             : 
    1259                 :             :         /* Outer loop over blocks to sample */
    1260         [ +  + ]:       11697 :         while (table_scan_analyze_next_block(scan, stream))
    1261                 :             :         {
    1262                 :       10697 :                 vacuum_delay_point(true);
    1263                 :             : 
    1264         [ +  + ]:     1374945 :                 while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
    1265                 :             :                 {
    1266                 :             :                         /*
    1267                 :             :                          * The first targrows sample rows are simply copied into the
    1268                 :             :                          * reservoir. Then we start replacing tuples in the sample until
    1269                 :             :                          * we reach the end of the relation.  This algorithm is from Jeff
    1270                 :             :                          * Vitter's paper (see full citation in utils/misc/sampling.c). It
    1271                 :             :                          * works by repeatedly computing the number of tuples to skip
    1272                 :             :                          * before selecting a tuple, which replaces a randomly chosen
    1273                 :             :                          * element of the reservoir (current set of tuples).  At all times
    1274                 :             :                          * the reservoir is a true random sample of the tuples we've
    1275                 :             :                          * passed over so far, so when we fall off the end of the relation
    1276                 :             :                          * we're done.
    1277                 :             :                          */
    1278         [ +  + ]:     1364248 :                         if (numrows < targrows)
    1279                 :     1084089 :                                 rows[numrows++] = ExecCopySlotHeapTuple(slot);
    1280                 :             :                         else
    1281                 :             :                         {
    1282                 :             :                                 /*
    1283                 :             :                                  * t in Vitter's paper is the number of records already
    1284                 :             :                                  * processed.  If we need to compute a new S value, we must
    1285                 :             :                                  * use the not-yet-incremented value of samplerows as t.
    1286                 :             :                                  */
    1287         [ +  + ]:      280159 :                                 if (rowstoskip < 0)
    1288                 :      144492 :                                         rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
    1289                 :             : 
    1290         [ +  + ]:      280159 :                                 if (rowstoskip <= 0)
    1291                 :             :                                 {
    1292                 :             :                                         /*
    1293                 :             :                                          * Found a suitable tuple, so save it, replacing one old
    1294                 :             :                                          * tuple at random
    1295                 :             :                                          */
    1296                 :      144490 :                                         int                     k = (int) (targrows * sampler_random_fract(&rstate.randstate));
    1297                 :             : 
    1298         [ +  - ]:      144490 :                                         Assert(k >= 0 && k < targrows);
    1299                 :      144490 :                                         heap_freetuple(rows[k]);
    1300                 :      144490 :                                         rows[k] = ExecCopySlotHeapTuple(slot);
    1301                 :      144490 :                                 }
    1302                 :             : 
    1303                 :      280159 :                                 rowstoskip -= 1;
    1304                 :             :                         }
    1305                 :             : 
    1306                 :     1364248 :                         samplerows += 1;
    1307                 :             :                 }
    1308                 :             : 
    1309                 :       10697 :                 pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
    1310                 :       10697 :                                                                          ++blksdone);
    1311                 :             :         }
    1312                 :             : 
    1313                 :        1000 :         read_stream_end(stream);
    1314                 :             : 
    1315                 :        1000 :         ExecDropSingleTupleTableSlot(slot);
    1316                 :        1000 :         table_endscan(scan);
    1317                 :             : 
    1318                 :             :         /*
    1319                 :             :          * If we didn't find as many tuples as we wanted then we're done. No sort
    1320                 :             :          * is needed, since they're already in order.
    1321                 :             :          *
    1322                 :             :          * Otherwise we need to sort the collected tuples by position
    1323                 :             :          * (itempointer). It's not worth worrying about corner cases where the
    1324                 :             :          * tuples are already sorted.
    1325                 :             :          */
    1326         [ +  + ]:        1000 :         if (numrows == targrows)
    1327                 :          15 :                 qsort_interruptible(rows, numrows, sizeof(HeapTuple),
    1328                 :             :                                                         compare_rows, NULL);
    1329                 :             : 
    1330                 :             :         /*
    1331                 :             :          * Estimate total numbers of live and dead rows in relation, extrapolating
    1332                 :             :          * on the assumption that the average tuple density in pages we didn't
    1333                 :             :          * scan is the same as in the pages we did scan.  Since what we scanned is
    1334                 :             :          * a random sample of the pages in the relation, this should be a good
    1335                 :             :          * assumption.
    1336                 :             :          */
    1337         [ +  + ]:        1000 :         if (bs.m > 0)
    1338                 :             :         {
    1339                 :         932 :                 *totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
    1340                 :         932 :                 *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
    1341                 :         932 :         }
    1342                 :             :         else
    1343                 :             :         {
    1344                 :          68 :                 *totalrows = 0.0;
    1345                 :          68 :                 *totaldeadrows = 0.0;
    1346                 :             :         }
    1347                 :             : 
    1348                 :             :         /*
    1349                 :             :          * Emit some interesting relation info
    1350                 :             :          */
    1351   [ -  +  #  #  :        1000 :         ereport(elevel,
          -  +  -  +  #  
                      # ]
    1352                 :             :                         (errmsg("\"%s\": scanned %d of %u pages, "
    1353                 :             :                                         "containing %.0f live rows and %.0f dead rows; "
    1354                 :             :                                         "%d rows in sample, %.0f estimated total rows",
    1355                 :             :                                         RelationGetRelationName(onerel),
    1356                 :             :                                         bs.m, totalblocks,
    1357                 :             :                                         liverows, deadrows,
    1358                 :             :                                         numrows, *totalrows)));
    1359                 :             : 
    1360                 :        2000 :         return numrows;
    1361                 :        1000 : }
    1362                 :             : 
    1363                 :             : /*
    1364                 :             :  * Comparator for sorting rows[] array
    1365                 :             :  */
    1366                 :             : static int
    1367                 :     2209962 : compare_rows(const void *a, const void *b, void *arg)
    1368                 :             : {
    1369                 :     2209962 :         HeapTuple       ha = *(const HeapTuple *) a;
    1370                 :     2209962 :         HeapTuple       hb = *(const HeapTuple *) b;
    1371                 :     2209962 :         BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
    1372                 :     2209962 :         OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
    1373                 :     2209962 :         BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
    1374                 :     2209962 :         OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
    1375                 :             : 
    1376         [ +  + ]:     2209962 :         if (ba < bb)
    1377                 :      602743 :                 return -1;
    1378         [ +  + ]:     1607219 :         if (ba > bb)
    1379                 :      644298 :                 return 1;
    1380         [ +  + ]:      962921 :         if (oa < ob)
    1381                 :      527290 :                 return -1;
    1382         [ +  - ]:      435631 :         if (oa > ob)
    1383                 :      435631 :                 return 1;
    1384                 :           0 :         return 0;
    1385                 :     2209962 : }
    1386                 :             : 
    1387                 :             : 
    1388                 :             : /*
    1389                 :             :  * acquire_inherited_sample_rows -- acquire sample rows from inheritance tree
    1390                 :             :  *
    1391                 :             :  * This has the same API as acquire_sample_rows, except that rows are
    1392                 :             :  * collected from all inheritance children as well as the specified table.
    1393                 :             :  * We fail and return zero if there are no inheritance children, or if all
    1394                 :             :  * children are foreign tables that don't support ANALYZE.
    1395                 :             :  */
    1396                 :             : static int
    1397                 :         142 : acquire_inherited_sample_rows(Relation onerel, int elevel,
    1398                 :             :                                                           HeapTuple *rows, int targrows,
    1399                 :             :                                                           double *totalrows, double *totaldeadrows)
    1400                 :             : {
    1401                 :         142 :         List       *tableOIDs;
    1402                 :         142 :         Relation   *rels;
    1403                 :         142 :         AcquireSampleRowsFunc *acquirefuncs;
    1404                 :         142 :         double     *relblocks;
    1405                 :         142 :         double          totalblocks;
    1406                 :         142 :         int                     numrows,
    1407                 :             :                                 nrels,
    1408                 :             :                                 i;
    1409                 :         142 :         ListCell   *lc;
    1410                 :         142 :         bool            has_child;
    1411                 :             : 
    1412                 :             :         /* Initialize output parameters to zero now, in case we exit early */
    1413                 :         142 :         *totalrows = 0;
    1414                 :         142 :         *totaldeadrows = 0;
    1415                 :             : 
    1416                 :             :         /*
    1417                 :             :          * Find all members of inheritance set.  We only need AccessShareLock on
    1418                 :             :          * the children.
    1419                 :             :          */
    1420                 :         142 :         tableOIDs =
    1421                 :         142 :                 find_all_inheritors(RelationGetRelid(onerel), AccessShareLock, NULL);
    1422                 :             : 
    1423                 :             :         /*
    1424                 :             :          * Check that there's at least one descendant, else fail.  This could
    1425                 :             :          * happen despite analyze_rel's relhassubclass check, if table once had a
    1426                 :             :          * child but no longer does.  In that case, we can clear the
    1427                 :             :          * relhassubclass field so as not to make the same mistake again later.
    1428                 :             :          * (This is safe because we hold ShareUpdateExclusiveLock.)
    1429                 :             :          */
    1430         [ +  + ]:         142 :         if (list_length(tableOIDs) < 2)
    1431                 :             :         {
    1432                 :             :                 /* CCI because we already updated the pg_class row in this command */
    1433                 :           3 :                 CommandCounterIncrement();
    1434                 :           3 :                 SetRelationHasSubclass(RelationGetRelid(onerel), false);
    1435   [ -  +  #  #  :           3 :                 ereport(elevel,
          -  +  -  +  #  
                      # ]
    1436                 :             :                                 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
    1437                 :             :                                                 get_namespace_name(RelationGetNamespace(onerel)),
    1438                 :             :                                                 RelationGetRelationName(onerel))));
    1439                 :           3 :                 return 0;
    1440                 :             :         }
    1441                 :             : 
    1442                 :             :         /*
    1443                 :             :          * Identify acquirefuncs to use, and count blocks in all the relations.
    1444                 :             :          * The result could overflow BlockNumber, so we use double arithmetic.
    1445                 :             :          */
    1446                 :         139 :         rels = (Relation *) palloc(list_length(tableOIDs) * sizeof(Relation));
    1447                 :         139 :         acquirefuncs = (AcquireSampleRowsFunc *)
    1448                 :         139 :                 palloc(list_length(tableOIDs) * sizeof(AcquireSampleRowsFunc));
    1449                 :         139 :         relblocks = (double *) palloc(list_length(tableOIDs) * sizeof(double));
    1450                 :         139 :         totalblocks = 0;
    1451                 :         139 :         nrels = 0;
    1452                 :         139 :         has_child = false;
    1453   [ +  -  +  +  :         643 :         foreach(lc, tableOIDs)
                   +  + ]
    1454                 :             :         {
    1455                 :         504 :                 Oid                     childOID = lfirst_oid(lc);
    1456                 :         504 :                 Relation        childrel;
    1457                 :         504 :                 AcquireSampleRowsFunc acquirefunc = NULL;
    1458                 :         504 :                 BlockNumber relpages = 0;
    1459                 :             : 
    1460                 :             :                 /* We already got the needed lock */
    1461                 :         504 :                 childrel = table_open(childOID, NoLock);
    1462                 :             : 
    1463                 :             :                 /* Ignore if temp table of another backend */
    1464   [ +  +  +  - ]:         504 :                 if (RELATION_IS_OTHER_TEMP(childrel))
    1465                 :             :                 {
    1466                 :             :                         /* ... but release the lock on it */
    1467         [ #  # ]:           0 :                         Assert(childrel != onerel);
    1468                 :           0 :                         table_close(childrel, AccessShareLock);
    1469                 :           0 :                         continue;
    1470                 :             :                 }
    1471                 :             : 
    1472                 :             :                 /* Check table type (MATVIEW can't happen, but might as well allow) */
    1473   [ +  +  -  + ]:         504 :                 if (childrel->rd_rel->relkind == RELKIND_RELATION ||
    1474                 :         135 :                         childrel->rd_rel->relkind == RELKIND_MATVIEW)
    1475                 :             :                 {
    1476                 :             :                         /* Regular table, so use the regular row acquisition function */
    1477                 :         369 :                         acquirefunc = acquire_sample_rows;
    1478                 :         369 :                         relpages = RelationGetNumberOfBlocks(childrel);
    1479                 :         369 :                 }
    1480         [ -  + ]:         135 :                 else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
    1481                 :             :                 {
    1482                 :             :                         /*
    1483                 :             :                          * For a foreign table, call the FDW's hook function to see
    1484                 :             :                          * whether it supports analysis.
    1485                 :             :                          */
    1486                 :           0 :                         FdwRoutine *fdwroutine;
    1487                 :           0 :                         bool            ok = false;
    1488                 :             : 
    1489                 :           0 :                         fdwroutine = GetFdwRoutineForRelation(childrel, false);
    1490                 :             : 
    1491         [ #  # ]:           0 :                         if (fdwroutine->AnalyzeForeignTable != NULL)
    1492                 :           0 :                                 ok = fdwroutine->AnalyzeForeignTable(childrel,
    1493                 :             :                                                                                                          &acquirefunc,
    1494                 :             :                                                                                                          &relpages);
    1495                 :             : 
    1496         [ #  # ]:           0 :                         if (!ok)
    1497                 :             :                         {
    1498                 :             :                                 /* ignore, but release the lock on it */
    1499         [ #  # ]:           0 :                                 Assert(childrel != onerel);
    1500                 :           0 :                                 table_close(childrel, AccessShareLock);
    1501                 :           0 :                                 continue;
    1502                 :             :                         }
    1503         [ #  # ]:           0 :                 }
    1504                 :             :                 else
    1505                 :             :                 {
    1506                 :             :                         /*
    1507                 :             :                          * ignore, but release the lock on it.  don't try to unlock the
    1508                 :             :                          * passed-in relation
    1509                 :             :                          */
    1510         [ -  + ]:         135 :                         Assert(childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
    1511         [ +  + ]:         135 :                         if (childrel != onerel)
    1512                 :          13 :                                 table_close(childrel, AccessShareLock);
    1513                 :             :                         else
    1514                 :         122 :                                 table_close(childrel, NoLock);
    1515                 :         135 :                         continue;
    1516                 :             :                 }
    1517                 :             : 
    1518                 :             :                 /* OK, we'll process this child */
    1519                 :         369 :                 has_child = true;
    1520                 :         369 :                 rels[nrels] = childrel;
    1521                 :         369 :                 acquirefuncs[nrels] = acquirefunc;
    1522                 :         369 :                 relblocks[nrels] = (double) relpages;
    1523                 :         369 :                 totalblocks += (double) relpages;
    1524                 :         369 :                 nrels++;
    1525      [ -  +  + ]:         504 :         }
    1526                 :             : 
    1527                 :             :         /*
    1528                 :             :          * If we don't have at least one child table to consider, fail.  If the
    1529                 :             :          * relation is a partitioned table, it's not counted as a child table.
    1530                 :             :          */
    1531         [ +  - ]:         139 :         if (!has_child)
    1532                 :             :         {
    1533   [ #  #  #  #  :           0 :                 ereport(elevel,
          #  #  #  #  #  
                      # ]
    1534                 :             :                                 (errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
    1535                 :             :                                                 get_namespace_name(RelationGetNamespace(onerel)),
    1536                 :             :                                                 RelationGetRelationName(onerel))));
    1537                 :           0 :                 return 0;
    1538                 :             :         }
    1539                 :             : 
    1540                 :             :         /*
    1541                 :             :          * Now sample rows from each relation, proportionally to its fraction of
    1542                 :             :          * the total block count.  (This might be less than desirable if the child
    1543                 :             :          * rels have radically different free-space percentages, but it's not
    1544                 :             :          * clear that it's worth working harder.)
    1545                 :             :          */
    1546                 :         139 :         pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_TOTAL,
    1547                 :         139 :                                                                  nrels);
    1548                 :         139 :         numrows = 0;
    1549         [ +  + ]:         508 :         for (i = 0; i < nrels; i++)
    1550                 :             :         {
    1551                 :         369 :                 Relation        childrel = rels[i];
    1552                 :         369 :                 AcquireSampleRowsFunc acquirefunc = acquirefuncs[i];
    1553                 :         369 :                 double          childblocks = relblocks[i];
    1554                 :             : 
    1555                 :             :                 /*
    1556                 :             :                  * Report progress.  The sampling function will normally report blocks
    1557                 :             :                  * done/total, but we need to reset them to 0 here, so that they don't
    1558                 :             :                  * show an old value until that.
    1559                 :             :                  */
    1560                 :             :                 {
    1561                 :         369 :                         const int       progress_index[] = {
    1562                 :             :                                 PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID,
    1563                 :             :                                 PROGRESS_ANALYZE_BLOCKS_DONE,
    1564                 :             :                                 PROGRESS_ANALYZE_BLOCKS_TOTAL
    1565                 :             :                         };
    1566                 :         738 :                         const int64 progress_vals[] = {
    1567                 :         369 :                                 RelationGetRelid(childrel),
    1568                 :             :                                 0,
    1569                 :             :                                 0,
    1570                 :             :                         };
    1571                 :             : 
    1572                 :         369 :                         pgstat_progress_update_multi_param(3, progress_index, progress_vals);
    1573                 :         369 :                 }
    1574                 :             : 
    1575         [ +  + ]:         369 :                 if (childblocks > 0)
    1576                 :             :                 {
    1577                 :         347 :                         int                     childtargrows;
    1578                 :             : 
    1579                 :         347 :                         childtargrows = (int) rint(targrows * childblocks / totalblocks);
    1580                 :             :                         /* Make sure we don't overrun due to roundoff error */
    1581         [ +  + ]:         347 :                         childtargrows = Min(childtargrows, targrows - numrows);
    1582         [ -  + ]:         347 :                         if (childtargrows > 0)
    1583                 :             :                         {
    1584                 :         347 :                                 int                     childrows;
    1585                 :         347 :                                 double          trows,
    1586                 :             :                                                         tdrows;
    1587                 :             : 
    1588                 :             :                                 /* Fetch a random sample of the child's rows */
    1589                 :         694 :                                 childrows = (*acquirefunc) (childrel, elevel,
    1590                 :         347 :                                                                                         rows + numrows, childtargrows,
    1591                 :             :                                                                                         &trows, &tdrows);
    1592                 :             : 
    1593                 :             :                                 /* We may need to convert from child's rowtype to parent's */
    1594   [ +  -  +  + ]:         347 :                                 if (childrows > 0 &&
    1595                 :         694 :                                         !equalRowTypes(RelationGetDescr(childrel),
    1596                 :         347 :                                                                    RelationGetDescr(onerel)))
    1597                 :             :                                 {
    1598                 :         334 :                                         TupleConversionMap *map;
    1599                 :             : 
    1600                 :         668 :                                         map = convert_tuples_by_name(RelationGetDescr(childrel),
    1601                 :         334 :                                                                                                  RelationGetDescr(onerel));
    1602         [ +  + ]:         334 :                                         if (map != NULL)
    1603                 :             :                                         {
    1604                 :          21 :                                                 int                     j;
    1605                 :             : 
    1606         [ +  + ]:       17379 :                                                 for (j = 0; j < childrows; j++)
    1607                 :             :                                                 {
    1608                 :       17358 :                                                         HeapTuple       newtup;
    1609                 :             : 
    1610                 :       17358 :                                                         newtup = execute_attr_map_tuple(rows[numrows + j], map);
    1611                 :       17358 :                                                         heap_freetuple(rows[numrows + j]);
    1612                 :       17358 :                                                         rows[numrows + j] = newtup;
    1613                 :       17358 :                                                 }
    1614                 :          21 :                                                 free_conversion_map(map);
    1615                 :          21 :                                         }
    1616                 :         334 :                                 }
    1617                 :             : 
    1618                 :             :                                 /* And add to counts */
    1619                 :         347 :                                 numrows += childrows;
    1620                 :         347 :                                 *totalrows += trows;
    1621                 :         347 :                                 *totaldeadrows += tdrows;
    1622                 :         347 :                         }
    1623                 :         347 :                 }
    1624                 :             : 
    1625                 :             :                 /*
    1626                 :             :                  * Note: we cannot release the child-table locks, since we may have
    1627                 :             :                  * pointers to their TOAST tables in the sampled rows.
    1628                 :             :                  */
    1629                 :         369 :                 table_close(childrel, NoLock);
    1630                 :         369 :                 pgstat_progress_update_param(PROGRESS_ANALYZE_CHILD_TABLES_DONE,
    1631                 :         369 :                                                                          i + 1);
    1632                 :         369 :         }
    1633                 :             : 
    1634                 :         139 :         return numrows;
    1635                 :         142 : }
    1636                 :             : 
    1637                 :             : 
    1638                 :             : /*
    1639                 :             :  *      update_attstats() -- update attribute statistics for one relation
    1640                 :             :  *
    1641                 :             :  *              Statistics are stored in several places: the pg_class row for the
    1642                 :             :  *              relation has stats about the whole relation, and there is a
    1643                 :             :  *              pg_statistic row for each (non-system) attribute that has ever
    1644                 :             :  *              been analyzed.  The pg_class values are updated by VACUUM, not here.
    1645                 :             :  *
    1646                 :             :  *              pg_statistic rows are just added or updated normally.  This means
    1647                 :             :  *              that pg_statistic will probably contain some deleted rows at the
    1648                 :             :  *              completion of a vacuum cycle, unless it happens to get vacuumed last.
    1649                 :             :  *
    1650                 :             :  *              To keep things simple, we punt for pg_statistic, and don't try
    1651                 :             :  *              to compute or store rows for pg_statistic itself in pg_statistic.
    1652                 :             :  *              This could possibly be made to work, but it's not worth the trouble.
    1653                 :             :  *              Note analyze_rel() has seen to it that we won't come here when
    1654                 :             :  *              vacuuming pg_statistic itself.
    1655                 :             :  *
    1656                 :             :  *              Note: there would be a race condition here if two backends could
    1657                 :             :  *              ANALYZE the same table concurrently.  Presently, we lock that out
    1658                 :             :  *              by taking a self-exclusive lock on the relation in analyze_rel().
    1659                 :             :  */
    1660                 :             : static void
    1661                 :        1000 : update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
    1662                 :             : {
    1663                 :        1000 :         Relation        sd;
    1664                 :        1000 :         int                     attno;
    1665                 :        1000 :         CatalogIndexState indstate = NULL;
    1666                 :             : 
    1667         [ +  + ]:        1000 :         if (natts <= 0)
    1668                 :         275 :                 return;                                 /* nothing to do */
    1669                 :             : 
    1670                 :         725 :         sd = table_open(StatisticRelationId, RowExclusiveLock);
    1671                 :             : 
    1672         [ +  + ]:        3153 :         for (attno = 0; attno < natts; attno++)
    1673                 :             :         {
    1674                 :        2428 :                 VacAttrStats *stats = vacattrstats[attno];
    1675                 :        2428 :                 HeapTuple       stup,
    1676                 :             :                                         oldtup;
    1677                 :        2428 :                 int                     i,
    1678                 :             :                                         k,
    1679                 :             :                                         n;
    1680                 :        2428 :                 Datum           values[Natts_pg_statistic];
    1681                 :        2428 :                 bool            nulls[Natts_pg_statistic];
    1682                 :        2428 :                 bool            replaces[Natts_pg_statistic];
    1683                 :             : 
    1684                 :             :                 /* Ignore attr if we weren't able to collect stats */
    1685         [ +  + ]:        2428 :                 if (!stats->stats_valid)
    1686                 :           1 :                         continue;
    1687                 :             : 
    1688                 :             :                 /*
    1689                 :             :                  * Construct a new pg_statistic tuple
    1690                 :             :                  */
    1691         [ +  + ]:       77664 :                 for (i = 0; i < Natts_pg_statistic; ++i)
    1692                 :             :                 {
    1693                 :       75237 :                         nulls[i] = false;
    1694                 :       75237 :                         replaces[i] = true;
    1695                 :       75237 :                 }
    1696                 :             : 
    1697                 :        2427 :                 values[Anum_pg_statistic_starelid - 1] = ObjectIdGetDatum(relid);
    1698                 :        2427 :                 values[Anum_pg_statistic_staattnum - 1] = Int16GetDatum(stats->tupattnum);
    1699                 :        2427 :                 values[Anum_pg_statistic_stainherit - 1] = BoolGetDatum(inh);
    1700                 :        2427 :                 values[Anum_pg_statistic_stanullfrac - 1] = Float4GetDatum(stats->stanullfrac);
    1701                 :        2427 :                 values[Anum_pg_statistic_stawidth - 1] = Int32GetDatum(stats->stawidth);
    1702                 :        2427 :                 values[Anum_pg_statistic_stadistinct - 1] = Float4GetDatum(stats->stadistinct);
    1703                 :        2427 :                 i = Anum_pg_statistic_stakind1 - 1;
    1704         [ +  + ]:       14562 :                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    1705                 :             :                 {
    1706                 :       12135 :                         values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
    1707                 :       12135 :                 }
    1708                 :        2427 :                 i = Anum_pg_statistic_staop1 - 1;
    1709         [ +  + ]:       14562 :                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    1710                 :             :                 {
    1711                 :       12135 :                         values[i++] = ObjectIdGetDatum(stats->staop[k]);     /* staopN */
    1712                 :       12135 :                 }
    1713                 :        2427 :                 i = Anum_pg_statistic_stacoll1 - 1;
    1714         [ +  + ]:       14562 :                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    1715                 :             :                 {
    1716                 :       12135 :                         values[i++] = ObjectIdGetDatum(stats->stacoll[k]);   /* stacollN */
    1717                 :       12135 :                 }
    1718                 :        2427 :                 i = Anum_pg_statistic_stanumbers1 - 1;
    1719         [ +  + ]:       14562 :                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    1720                 :             :                 {
    1721         [ +  + ]:       12135 :                         if (stats->stanumbers[k] != NULL)
    1722                 :             :                         {
    1723                 :        3396 :                                 int                     nnum = stats->numnumbers[k];
    1724                 :        3396 :                                 Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
    1725                 :        3396 :                                 ArrayType  *arry;
    1726                 :             : 
    1727         [ +  + ]:       35520 :                                 for (n = 0; n < nnum; n++)
    1728                 :       32124 :                                         numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
    1729                 :        3396 :                                 arry = construct_array_builtin(numdatums, nnum, FLOAT4OID);
    1730                 :        3396 :                                 values[i++] = PointerGetDatum(arry);    /* stanumbersN */
    1731                 :        3396 :                         }
    1732                 :             :                         else
    1733                 :             :                         {
    1734                 :        8739 :                                 nulls[i] = true;
    1735                 :        8739 :                                 values[i++] = (Datum) 0;
    1736                 :             :                         }
    1737                 :       12135 :                 }
    1738                 :        2427 :                 i = Anum_pg_statistic_stavalues1 - 1;
    1739         [ +  + ]:       14562 :                 for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
    1740                 :             :                 {
    1741         [ +  + ]:       12135 :                         if (stats->stavalues[k] != NULL)
    1742                 :             :                         {
    1743                 :        2365 :                                 ArrayType  *arry;
    1744                 :             : 
    1745                 :        4730 :                                 arry = construct_array(stats->stavalues[k],
    1746                 :        2365 :                                                                            stats->numvalues[k],
    1747                 :        2365 :                                                                            stats->statypid[k],
    1748                 :        2365 :                                                                            stats->statyplen[k],
    1749                 :        2365 :                                                                            stats->statypbyval[k],
    1750                 :        2365 :                                                                            stats->statypalign[k]);
    1751                 :        2365 :                                 values[i++] = PointerGetDatum(arry);    /* stavaluesN */
    1752                 :        2365 :                         }
    1753                 :             :                         else
    1754                 :             :                         {
    1755                 :        9770 :                                 nulls[i] = true;
    1756                 :        9770 :                                 values[i++] = (Datum) 0;
    1757                 :             :                         }
    1758                 :       12135 :                 }
    1759                 :             : 
    1760                 :             :                 /* Is there already a pg_statistic tuple for this attribute? */
    1761                 :        2427 :                 oldtup = SearchSysCache3(STATRELATTINH,
    1762                 :        2427 :                                                                  ObjectIdGetDatum(relid),
    1763                 :        2427 :                                                                  Int16GetDatum(stats->tupattnum),
    1764                 :        2427 :                                                                  BoolGetDatum(inh));
    1765                 :             : 
    1766                 :             :                 /* Open index information when we know we need it */
    1767         [ +  + ]:        2427 :                 if (indstate == NULL)
    1768                 :         724 :                         indstate = CatalogOpenIndexes(sd);
    1769                 :             : 
    1770         [ +  + ]:        2427 :                 if (HeapTupleIsValid(oldtup))
    1771                 :             :                 {
    1772                 :             :                         /* Yes, replace it */
    1773                 :        1502 :                         stup = heap_modify_tuple(oldtup,
    1774                 :         751 :                                                                          RelationGetDescr(sd),
    1775                 :         751 :                                                                          values,
    1776                 :         751 :                                                                          nulls,
    1777                 :         751 :                                                                          replaces);
    1778                 :         751 :                         ReleaseSysCache(oldtup);
    1779                 :         751 :                         CatalogTupleUpdateWithInfo(sd, &stup->t_self, stup, indstate);
    1780                 :         751 :                 }
    1781                 :             :                 else
    1782                 :             :                 {
    1783                 :             :                         /* No, insert new tuple */
    1784                 :        1676 :                         stup = heap_form_tuple(RelationGetDescr(sd), values, nulls);
    1785                 :        1676 :                         CatalogTupleInsertWithInfo(sd, stup, indstate);
    1786                 :             :                 }
    1787                 :             : 
    1788                 :        2427 :                 heap_freetuple(stup);
    1789         [ +  + ]:        2428 :         }
    1790                 :             : 
    1791         [ +  + ]:         725 :         if (indstate != NULL)
    1792                 :         724 :                 CatalogCloseIndexes(indstate);
    1793                 :         725 :         table_close(sd, RowExclusiveLock);
    1794                 :        1000 : }
    1795                 :             : 
    1796                 :             : /*
    1797                 :             :  * Standard fetch function for use by compute_stats subroutines.
    1798                 :             :  *
    1799                 :             :  * This exists to provide some insulation between compute_stats routines
    1800                 :             :  * and the actual storage of the sample data.
    1801                 :             :  */
    1802                 :             : static Datum
    1803                 :     3776059 : std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
    1804                 :             : {
    1805                 :     3776059 :         int                     attnum = stats->tupattnum;
    1806                 :     3776059 :         HeapTuple       tuple = stats->rows[rownum];
    1807                 :     3776059 :         TupleDesc       tupDesc = stats->tupDesc;
    1808                 :             : 
    1809                 :     7552118 :         return heap_getattr(tuple, attnum, tupDesc, isNull);
    1810                 :     3776059 : }
    1811                 :             : 
    1812                 :             : /*
    1813                 :             :  * Fetch function for analyzing index expressions.
    1814                 :             :  *
    1815                 :             :  * We have not bothered to construct index tuples, instead the data is
    1816                 :             :  * just in Datum arrays.
    1817                 :             :  */
    1818                 :             : static Datum
    1819                 :       13414 : ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
    1820                 :             : {
    1821                 :       13414 :         int                     i;
    1822                 :             : 
    1823                 :             :         /* exprvals and exprnulls are already offset for proper column */
    1824                 :       13414 :         i = rownum * stats->rowstride;
    1825                 :       13414 :         *isNull = stats->exprnulls[i];
    1826                 :       26828 :         return stats->exprvals[i];
    1827                 :       13414 : }
    1828                 :             : 
    1829                 :             : 
    1830                 :             : /*==========================================================================
    1831                 :             :  *
    1832                 :             :  * Code below this point represents the "standard" type-specific statistics
    1833                 :             :  * analysis algorithms.  This code can be replaced on a per-data-type basis
    1834                 :             :  * by setting a nonzero value in pg_type.typanalyze.
    1835                 :             :  *
    1836                 :             :  *==========================================================================
    1837                 :             :  */
    1838                 :             : 
    1839                 :             : 
    1840                 :             : /*
    1841                 :             :  * To avoid consuming too much memory during analysis and/or too much space
    1842                 :             :  * in the resulting pg_statistic rows, we ignore varlena datums that are wider
    1843                 :             :  * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
    1844                 :             :  * and distinct-value calculations since a wide value is unlikely to be
    1845                 :             :  * duplicated at all, much less be a most-common value.  For the same reason,
    1846                 :             :  * ignoring wide values will not affect our estimates of histogram bin
    1847                 :             :  * boundaries very much.
    1848                 :             :  */
    1849                 :             : #define WIDTH_THRESHOLD  1024
    1850                 :             : 
    1851                 :             : #define swapInt(a,b)    do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
    1852                 :             : #define swapDatum(a,b)  do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
    1853                 :             : 
    1854                 :             : /*
    1855                 :             :  * Extra information used by the default analysis routines
    1856                 :             :  */
    1857                 :             : typedef struct
    1858                 :             : {
    1859                 :             :         int                     count;                  /* # of duplicates */
    1860                 :             :         int                     first;                  /* values[] index of first occurrence */
    1861                 :             : } ScalarMCVItem;
    1862                 :             : 
    1863                 :             : typedef struct
    1864                 :             : {
    1865                 :             :         SortSupport ssup;
    1866                 :             :         int                *tupnoLink;
    1867                 :             : } CompareScalarsContext;
    1868                 :             : 
    1869                 :             : 
    1870                 :             : static void compute_trivial_stats(VacAttrStatsP stats,
    1871                 :             :                                                                   AnalyzeAttrFetchFunc fetchfunc,
    1872                 :             :                                                                   int samplerows,
    1873                 :             :                                                                   double totalrows);
    1874                 :             : static void compute_distinct_stats(VacAttrStatsP stats,
    1875                 :             :                                                                    AnalyzeAttrFetchFunc fetchfunc,
    1876                 :             :                                                                    int samplerows,
    1877                 :             :                                                                    double totalrows);
    1878                 :             : static void compute_scalar_stats(VacAttrStatsP stats,
    1879                 :             :                                                                  AnalyzeAttrFetchFunc fetchfunc,
    1880                 :             :                                                                  int samplerows,
    1881                 :             :                                                                  double totalrows);
    1882                 :             : static int      compare_scalars(const void *a, const void *b, void *arg);
    1883                 :             : static int      compare_mcvs(const void *a, const void *b, void *arg);
    1884                 :             : static int      analyze_mcv_list(int *mcv_counts,
    1885                 :             :                                                          int num_mcv,
    1886                 :             :                                                          double stadistinct,
    1887                 :             :                                                          double stanullfrac,
    1888                 :             :                                                          int samplerows,
    1889                 :             :                                                          double totalrows);
    1890                 :             : 
    1891                 :             : 
    1892                 :             : /*
    1893                 :             :  * std_typanalyze -- the default type-specific typanalyze function
    1894                 :             :  */
    1895                 :             : bool
    1896                 :        2899 : std_typanalyze(VacAttrStats *stats)
    1897                 :             : {
    1898                 :        2899 :         Oid                     ltopr;
    1899                 :        2899 :         Oid                     eqopr;
    1900                 :        2899 :         StdAnalyzeData *mystats;
    1901                 :             : 
    1902                 :             :         /* If the attstattarget column is negative, use the default value */
    1903         [ +  + ]:        2899 :         if (stats->attstattarget < 0)
    1904                 :        2790 :                 stats->attstattarget = default_statistics_target;
    1905                 :             : 
    1906                 :             :         /* Look for default "<" and "=" operators for column's type */
    1907                 :        2899 :         get_sort_group_operators(stats->attrtypid,
    1908                 :             :                                                          false, false, false,
    1909                 :             :                                                          &ltopr, &eqopr, NULL,
    1910                 :             :                                                          NULL);
    1911                 :             : 
    1912                 :             :         /* Save the operator info for compute_stats routines */
    1913                 :        2899 :         mystats = palloc_object(StdAnalyzeData);
    1914                 :        2899 :         mystats->eqopr = eqopr;
    1915         [ +  + ]:        2899 :         mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
    1916                 :        2899 :         mystats->ltopr = ltopr;
    1917                 :        2899 :         stats->extra_data = mystats;
    1918                 :             : 
    1919                 :             :         /*
    1920                 :             :          * Determine which standard statistics algorithm to use
    1921                 :             :          */
    1922   [ +  +  +  + ]:        2899 :         if (OidIsValid(eqopr) && OidIsValid(ltopr))
    1923                 :             :         {
    1924                 :             :                 /* Seems to be a scalar datatype */
    1925                 :        2859 :                 stats->compute_stats = compute_scalar_stats;
    1926                 :             :                 /*--------------------
    1927                 :             :                  * The following choice of minrows is based on the paper
    1928                 :             :                  * "Random sampling for histogram construction: how much is enough?"
    1929                 :             :                  * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
    1930                 :             :                  * Proceedings of ACM SIGMOD International Conference on Management
    1931                 :             :                  * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
    1932                 :             :                  * says that for table size n, histogram size k, maximum relative
    1933                 :             :                  * error in bin size f, and error probability gamma, the minimum
    1934                 :             :                  * random sample size is
    1935                 :             :                  *              r = 4 * k * ln(2*n/gamma) / f^2
    1936                 :             :                  * Taking f = 0.5, gamma = 0.01, n = 10^6 rows, we obtain
    1937                 :             :                  *              r = 305.82 * k
    1938                 :             :                  * Note that because of the log function, the dependence on n is
    1939                 :             :                  * quite weak; even at n = 10^12, a 300*k sample gives <= 0.66
    1940                 :             :                  * bin size error with probability 0.99.  So there's no real need to
    1941                 :             :                  * scale for n, which is a good thing because we don't necessarily
    1942                 :             :                  * know it at this point.
    1943                 :             :                  *--------------------
    1944                 :             :                  */
    1945                 :        2859 :                 stats->minrows = 300 * stats->attstattarget;
    1946                 :        2859 :         }
    1947         [ +  + ]:          40 :         else if (OidIsValid(eqopr))
    1948                 :             :         {
    1949                 :             :                 /* We can still recognize distinct values */
    1950                 :          18 :                 stats->compute_stats = compute_distinct_stats;
    1951                 :             :                 /* Might as well use the same minrows as above */
    1952                 :          18 :                 stats->minrows = 300 * stats->attstattarget;
    1953                 :          18 :         }
    1954                 :             :         else
    1955                 :             :         {
    1956                 :             :                 /* Can't do much but the trivial stuff */
    1957                 :          22 :                 stats->compute_stats = compute_trivial_stats;
    1958                 :             :                 /* Might as well use the same minrows as above */
    1959                 :          22 :                 stats->minrows = 300 * stats->attstattarget;
    1960                 :             :         }
    1961                 :             : 
    1962                 :        2899 :         return true;
    1963                 :        2899 : }
    1964                 :             : 
    1965                 :             : 
    1966                 :             : /*
    1967                 :             :  *      compute_trivial_stats() -- compute very basic column statistics
    1968                 :             :  *
    1969                 :             :  *      We use this when we cannot find a hash "=" operator for the datatype.
    1970                 :             :  *
    1971                 :             :  *      We determine the fraction of non-null rows and the average datum width.
    1972                 :             :  */
    1973                 :             : static void
    1974                 :          21 : compute_trivial_stats(VacAttrStatsP stats,
    1975                 :             :                                           AnalyzeAttrFetchFunc fetchfunc,
    1976                 :             :                                           int samplerows,
    1977                 :             :                                           double totalrows)
    1978                 :             : {
    1979                 :          21 :         int                     i;
    1980                 :          21 :         int                     null_cnt = 0;
    1981                 :          21 :         int                     nonnull_cnt = 0;
    1982                 :          21 :         double          total_width = 0;
    1983         [ -  + ]:          42 :         bool            is_varlena = (!stats->attrtype->typbyval &&
    1984                 :          21 :                                                           stats->attrtype->typlen == -1);
    1985         [ -  + ]:          42 :         bool            is_varwidth = (!stats->attrtype->typbyval &&
    1986                 :          21 :                                                            stats->attrtype->typlen < 0);
    1987                 :             : 
    1988         [ +  + ]:       48294 :         for (i = 0; i < samplerows; i++)
    1989                 :             :         {
    1990                 :       48273 :                 Datum           value;
    1991                 :       48273 :                 bool            isnull;
    1992                 :             : 
    1993                 :       48273 :                 vacuum_delay_point(true);
    1994                 :             : 
    1995                 :       48273 :                 value = fetchfunc(stats, i, &isnull);
    1996                 :             : 
    1997                 :             :                 /* Check for null/nonnull */
    1998         [ +  + ]:       48273 :                 if (isnull)
    1999                 :             :                 {
    2000                 :        3791 :                         null_cnt++;
    2001                 :        3791 :                         continue;
    2002                 :             :                 }
    2003                 :       44482 :                 nonnull_cnt++;
    2004                 :             : 
    2005                 :             :                 /*
    2006                 :             :                  * If it's a variable-width field, add up widths for average width
    2007                 :             :                  * calculation.  Note that if the value is toasted, we use the toasted
    2008                 :             :                  * width.  We don't bother with this calculation if it's a fixed-width
    2009                 :             :                  * type.
    2010                 :             :                  */
    2011         [ +  + ]:       44482 :                 if (is_varlena)
    2012                 :             :                 {
    2013                 :        5718 :                         total_width += VARSIZE_ANY(DatumGetPointer(value));
    2014                 :        5718 :                 }
    2015         [ +  - ]:       38764 :                 else if (is_varwidth)
    2016                 :             :                 {
    2017                 :             :                         /* must be cstring */
    2018                 :           0 :                         total_width += strlen(DatumGetCString(value)) + 1;
    2019                 :           0 :                 }
    2020      [ -  +  + ]:       48273 :         }
    2021                 :             : 
    2022                 :             :         /* We can only compute average width if we found some non-null values. */
    2023         [ +  + ]:          21 :         if (nonnull_cnt > 0)
    2024                 :             :         {
    2025                 :          20 :                 stats->stats_valid = true;
    2026                 :             :                 /* Do the simple null-frac and width stats */
    2027                 :          20 :                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
    2028         [ +  + ]:          20 :                 if (is_varwidth)
    2029                 :           9 :                         stats->stawidth = total_width / (double) nonnull_cnt;
    2030                 :             :                 else
    2031                 :          11 :                         stats->stawidth = stats->attrtype->typlen;
    2032                 :          20 :                 stats->stadistinct = 0.0;    /* "unknown" */
    2033                 :          20 :         }
    2034         [ -  + ]:           1 :         else if (null_cnt > 0)
    2035                 :             :         {
    2036                 :             :                 /* We found only nulls; assume the column is entirely null */
    2037                 :           1 :                 stats->stats_valid = true;
    2038                 :           1 :                 stats->stanullfrac = 1.0;
    2039         [ +  - ]:           1 :                 if (is_varwidth)
    2040                 :           1 :                         stats->stawidth = 0; /* "unknown" */
    2041                 :             :                 else
    2042                 :           0 :                         stats->stawidth = stats->attrtype->typlen;
    2043                 :           1 :                 stats->stadistinct = 0.0;    /* "unknown" */
    2044                 :           1 :         }
    2045                 :          21 : }
    2046                 :             : 
    2047                 :             : 
    2048                 :             : /*
    2049                 :             :  *      compute_distinct_stats() -- compute column statistics including ndistinct
    2050                 :             :  *
    2051                 :             :  *      We use this when we can find only an "=" operator for the datatype.
    2052                 :             :  *
    2053                 :             :  *      We determine the fraction of non-null rows, the average width, the
    2054                 :             :  *      most common values, and the (estimated) number of distinct values.
    2055                 :             :  *
    2056                 :             :  *      The most common values are determined by brute force: we keep a list
    2057                 :             :  *      of previously seen values, ordered by number of times seen, as we scan
    2058                 :             :  *      the samples.  A newly seen value is inserted just after the last
    2059                 :             :  *      multiply-seen value, causing the bottommost (oldest) singly-seen value
    2060                 :             :  *      to drop off the list.  The accuracy of this method, and also its cost,
    2061                 :             :  *      depend mainly on the length of the list we are willing to keep.
    2062                 :             :  */
    2063                 :             : static void
    2064                 :          13 : compute_distinct_stats(VacAttrStatsP stats,
    2065                 :             :                                            AnalyzeAttrFetchFunc fetchfunc,
    2066                 :             :                                            int samplerows,
    2067                 :             :                                            double totalrows)
    2068                 :             : {
    2069                 :          13 :         int                     i;
    2070                 :          13 :         int                     null_cnt = 0;
    2071                 :          13 :         int                     nonnull_cnt = 0;
    2072                 :          13 :         int                     toowide_cnt = 0;
    2073                 :          13 :         double          total_width = 0;
    2074         [ +  + ]:          22 :         bool            is_varlena = (!stats->attrtype->typbyval &&
    2075                 :           9 :                                                           stats->attrtype->typlen == -1);
    2076         [ +  + ]:          22 :         bool            is_varwidth = (!stats->attrtype->typbyval &&
    2077                 :           9 :                                                            stats->attrtype->typlen < 0);
    2078                 :          13 :         FmgrInfo        f_cmpeq;
    2079                 :             :         typedef struct
    2080                 :             :         {
    2081                 :             :                 Datum           value;
    2082                 :             :                 int                     count;
    2083                 :             :         } TrackItem;
    2084                 :          13 :         TrackItem  *track;
    2085                 :          13 :         int                     track_cnt,
    2086                 :             :                                 track_max;
    2087                 :          13 :         int                     num_mcv = stats->attstattarget;
    2088                 :          13 :         StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
    2089                 :             : 
    2090                 :             :         /*
    2091                 :             :          * We track up to 2*n values for an n-element MCV list; but at least 10
    2092                 :             :          */
    2093                 :          13 :         track_max = 2 * num_mcv;
    2094         [ +  - ]:          13 :         if (track_max < 10)
    2095                 :           0 :                 track_max = 10;
    2096                 :          13 :         track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
    2097                 :          13 :         track_cnt = 0;
    2098                 :             : 
    2099                 :          13 :         fmgr_info(mystats->eqfunc, &f_cmpeq);
    2100                 :             : 
    2101         [ +  + ]:        8795 :         for (i = 0; i < samplerows; i++)
    2102                 :             :         {
    2103                 :        8782 :                 Datum           value;
    2104                 :        8782 :                 bool            isnull;
    2105                 :        8782 :                 bool            match;
    2106                 :        8782 :                 int                     firstcount1,
    2107                 :             :                                         j;
    2108                 :             : 
    2109                 :        8782 :                 vacuum_delay_point(true);
    2110                 :             : 
    2111                 :        8782 :                 value = fetchfunc(stats, i, &isnull);
    2112                 :             : 
    2113                 :             :                 /* Check for null/nonnull */
    2114         [ +  + ]:        8782 :                 if (isnull)
    2115                 :             :                 {
    2116                 :        7413 :                         null_cnt++;
    2117                 :        7413 :                         continue;
    2118                 :             :                 }
    2119                 :        1369 :                 nonnull_cnt++;
    2120                 :             : 
    2121                 :             :                 /*
    2122                 :             :                  * If it's a variable-width field, add up widths for average width
    2123                 :             :                  * calculation.  Note that if the value is toasted, we use the toasted
    2124                 :             :                  * width.  We don't bother with this calculation if it's a fixed-width
    2125                 :             :                  * type.
    2126                 :             :                  */
    2127         [ +  + ]:        1369 :                 if (is_varlena)
    2128                 :             :                 {
    2129                 :         533 :                         total_width += VARSIZE_ANY(DatumGetPointer(value));
    2130                 :             : 
    2131                 :             :                         /*
    2132                 :             :                          * If the value is toasted, we want to detoast it just once to
    2133                 :             :                          * avoid repeated detoastings and resultant excess memory usage
    2134                 :             :                          * during the comparisons.  Also, check to see if the value is
    2135                 :             :                          * excessively wide, and if so don't detoast at all --- just
    2136                 :             :                          * ignore the value.
    2137                 :             :                          */
    2138         [ -  + ]:         533 :                         if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
    2139                 :             :                         {
    2140                 :           0 :                                 toowide_cnt++;
    2141                 :           0 :                                 continue;
    2142                 :             :                         }
    2143                 :         533 :                         value = PointerGetDatum(PG_DETOAST_DATUM(value));
    2144                 :         533 :                 }
    2145         [ +  - ]:         836 :                 else if (is_varwidth)
    2146                 :             :                 {
    2147                 :             :                         /* must be cstring */
    2148                 :           0 :                         total_width += strlen(DatumGetCString(value)) + 1;
    2149                 :           0 :                 }
    2150                 :             : 
    2151                 :             :                 /*
    2152                 :             :                  * See if the value matches anything we're already tracking.
    2153                 :             :                  */
    2154                 :        1369 :                 match = false;
    2155                 :        1369 :                 firstcount1 = track_cnt;
    2156         [ +  + ]:        1920 :                 for (j = 0; j < track_cnt; j++)
    2157                 :             :                 {
    2158         [ +  + ]:        1890 :                         if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
    2159                 :        1890 :                                                                                            stats->attrcollid,
    2160                 :        1890 :                                                                                            value, track[j].value)))
    2161                 :             :                         {
    2162                 :        1339 :                                 match = true;
    2163                 :        1339 :                                 break;
    2164                 :             :                         }
    2165   [ +  +  +  + ]:         551 :                         if (j < firstcount1 && track[j].count == 1)
    2166                 :          19 :                                 firstcount1 = j;
    2167                 :         551 :                 }
    2168                 :             : 
    2169         [ +  + ]:        1369 :                 if (match)
    2170                 :             :                 {
    2171                 :             :                         /* Found a match */
    2172                 :        1339 :                         track[j].count++;
    2173                 :             :                         /* This value may now need to "bubble up" in the track list */
    2174   [ +  +  +  + ]:        1359 :                         while (j > 0 && track[j].count > track[j - 1].count)
    2175                 :             :                         {
    2176                 :          20 :                                 swapDatum(track[j].value, track[j - 1].value);
    2177                 :          20 :                                 swapInt(track[j].count, track[j - 1].count);
    2178                 :          20 :                                 j--;
    2179                 :             :                         }
    2180                 :        1339 :                 }
    2181                 :             :                 else
    2182                 :             :                 {
    2183                 :             :                         /* No match.  Insert at head of count-1 list */
    2184         [ -  + ]:          30 :                         if (track_cnt < track_max)
    2185                 :          30 :                                 track_cnt++;
    2186         [ +  + ]:          48 :                         for (j = track_cnt - 1; j > firstcount1; j--)
    2187                 :             :                         {
    2188                 :          18 :                                 track[j].value = track[j - 1].value;
    2189                 :          18 :                                 track[j].count = track[j - 1].count;
    2190                 :          18 :                         }
    2191         [ -  + ]:          30 :                         if (firstcount1 < track_cnt)
    2192                 :             :                         {
    2193                 :          30 :                                 track[firstcount1].value = value;
    2194                 :          30 :                                 track[firstcount1].count = 1;
    2195                 :          30 :                         }
    2196                 :             :                 }
    2197      [ -  +  + ]:        8782 :         }
    2198                 :             : 
    2199                 :             :         /* We can only compute real stats if we found some non-null values. */
    2200         [ +  + ]:          13 :         if (nonnull_cnt > 0)
    2201                 :             :         {
    2202                 :           9 :                 int                     nmultiple,
    2203                 :             :                                         summultiple;
    2204                 :             : 
    2205                 :           9 :                 stats->stats_valid = true;
    2206                 :             :                 /* Do the simple null-frac and width stats */
    2207                 :           9 :                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
    2208         [ +  + ]:           9 :                 if (is_varwidth)
    2209                 :           5 :                         stats->stawidth = total_width / (double) nonnull_cnt;
    2210                 :             :                 else
    2211                 :           4 :                         stats->stawidth = stats->attrtype->typlen;
    2212                 :             : 
    2213                 :             :                 /* Count the number of values we found multiple times */
    2214                 :           9 :                 summultiple = 0;
    2215         [ +  + ]:          32 :                 for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
    2216                 :             :                 {
    2217         [ +  + ]:          28 :                         if (track[nmultiple].count == 1)
    2218                 :           5 :                                 break;
    2219                 :          23 :                         summultiple += track[nmultiple].count;
    2220                 :          23 :                 }
    2221                 :             : 
    2222         [ +  + ]:           9 :                 if (nmultiple == 0)
    2223                 :             :                 {
    2224                 :             :                         /*
    2225                 :             :                          * If we found no repeated non-null values, assume it's a unique
    2226                 :             :                          * column; but be sure to discount for any nulls we found.
    2227                 :             :                          */
    2228                 :           2 :                         stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
    2229                 :           2 :                 }
    2230   [ +  -  +  -  :           7 :                 else if (track_cnt < track_max && toowide_cnt == 0 &&
                   +  + ]
    2231                 :           7 :                                  nmultiple == track_cnt)
    2232                 :             :                 {
    2233                 :             :                         /*
    2234                 :             :                          * Our track list includes every value in the sample, and every
    2235                 :             :                          * value appeared more than once.  Assume the column has just
    2236                 :             :                          * these values.  (This case is meant to address columns with
    2237                 :             :                          * small, fixed sets of possible values, such as boolean or enum
    2238                 :             :                          * columns.  If there are any values that appear just once in the
    2239                 :             :                          * sample, including too-wide values, we should assume that that's
    2240                 :             :                          * not what we're dealing with.)
    2241                 :             :                          */
    2242                 :           4 :                         stats->stadistinct = track_cnt;
    2243                 :           4 :                 }
    2244                 :             :                 else
    2245                 :             :                 {
    2246                 :             :                         /*----------
    2247                 :             :                          * Estimate the number of distinct values using the estimator
    2248                 :             :                          * proposed by Haas and Stokes in IBM Research Report RJ 10025:
    2249                 :             :                          *              n*d / (n - f1 + f1*n/N)
    2250                 :             :                          * where f1 is the number of distinct values that occurred
    2251                 :             :                          * exactly once in our sample of n rows (from a total of N),
    2252                 :             :                          * and d is the total number of distinct values in the sample.
    2253                 :             :                          * This is their Duj1 estimator; the other estimators they
    2254                 :             :                          * recommend are considerably more complex, and are numerically
    2255                 :             :                          * very unstable when n is much smaller than N.
    2256                 :             :                          *
    2257                 :             :                          * In this calculation, we consider only non-nulls.  We used to
    2258                 :             :                          * include rows with null values in the n and N counts, but that
    2259                 :             :                          * leads to inaccurate answers in columns with many nulls, and
    2260                 :             :                          * it's intuitively bogus anyway considering the desired result is
    2261                 :             :                          * the number of distinct non-null values.
    2262                 :             :                          *
    2263                 :             :                          * We assume (not very reliably!) that all the multiply-occurring
    2264                 :             :                          * values are reflected in the final track[] list, and the other
    2265                 :             :                          * nonnull values all appeared but once.  (XXX this usually
    2266                 :             :                          * results in a drastic overestimate of ndistinct.  Can we do
    2267                 :             :                          * any better?)
    2268                 :             :                          *----------
    2269                 :             :                          */
    2270                 :           3 :                         int                     f1 = nonnull_cnt - summultiple;
    2271                 :           3 :                         int                     d = f1 + nmultiple;
    2272                 :           3 :                         double          n = samplerows - null_cnt;
    2273                 :           3 :                         double          N = totalrows * (1.0 - stats->stanullfrac);
    2274                 :           3 :                         double          stadistinct;
    2275                 :             : 
    2276                 :             :                         /* N == 0 shouldn't happen, but just in case ... */
    2277         [ +  - ]:           3 :                         if (N > 0)
    2278                 :           3 :                                 stadistinct = (n * d) / ((n - f1) + f1 * n / N);
    2279                 :             :                         else
    2280                 :           0 :                                 stadistinct = 0;
    2281                 :             : 
    2282                 :             :                         /* Clamp to sane range in case of roundoff error */
    2283         [ +  + ]:           3 :                         if (stadistinct < d)
    2284                 :           1 :                                 stadistinct = d;
    2285         [ +  - ]:           3 :                         if (stadistinct > N)
    2286                 :           0 :                                 stadistinct = N;
    2287                 :             :                         /* And round to integer */
    2288                 :           3 :                         stats->stadistinct = floor(stadistinct + 0.5);
    2289                 :           3 :                 }
    2290                 :             : 
    2291                 :             :                 /*
    2292                 :             :                  * If we estimated the number of distinct values at more than 10% of
    2293                 :             :                  * the total row count (a very arbitrary limit), then assume that
    2294                 :             :                  * stadistinct should scale with the row count rather than be a fixed
    2295                 :             :                  * value.
    2296                 :             :                  */
    2297         [ +  + ]:           9 :                 if (stats->stadistinct > 0.1 * totalrows)
    2298                 :           1 :                         stats->stadistinct = -(stats->stadistinct / totalrows);
    2299                 :             : 
    2300                 :             :                 /*
    2301                 :             :                  * Decide how many values are worth storing as most-common values. If
    2302                 :             :                  * we are able to generate a complete MCV list (all the values in the
    2303                 :             :                  * sample will fit, and we think these are all the ones in the table),
    2304                 :             :                  * then do so.  Otherwise, store only those values that are
    2305                 :             :                  * significantly more common than the values not in the list.
    2306                 :             :                  *
    2307                 :             :                  * Note: the first of these cases is meant to address columns with
    2308                 :             :                  * small, fixed sets of possible values, such as boolean or enum
    2309                 :             :                  * columns.  If we can *completely* represent the column population by
    2310                 :             :                  * an MCV list that will fit into the stats target, then we should do
    2311                 :             :                  * so and thus provide the planner with complete information.  But if
    2312                 :             :                  * the MCV list is not complete, it's generally worth being more
    2313                 :             :                  * selective, and not just filling it all the way up to the stats
    2314                 :             :                  * target.
    2315                 :             :                  */
    2316   [ +  -  +  - ]:           9 :                 if (track_cnt < track_max && toowide_cnt == 0 &&
    2317   [ +  +  -  + ]:           9 :                         stats->stadistinct > 0 &&
    2318                 :           6 :                         track_cnt <= num_mcv)
    2319                 :             :                 {
    2320                 :             :                         /* Track list includes all values seen, and all will fit */
    2321                 :           6 :                         num_mcv = track_cnt;
    2322                 :           6 :                 }
    2323                 :             :                 else
    2324                 :             :                 {
    2325                 :           3 :                         int                *mcv_counts;
    2326                 :             : 
    2327                 :             :                         /* Incomplete list; decide how many values are worth keeping */
    2328         [ -  + ]:           3 :                         if (num_mcv > track_cnt)
    2329                 :           3 :                                 num_mcv = track_cnt;
    2330                 :             : 
    2331         [ +  - ]:           3 :                         if (num_mcv > 0)
    2332                 :             :                         {
    2333                 :           3 :                                 mcv_counts = (int *) palloc(num_mcv * sizeof(int));
    2334         [ +  + ]:           7 :                                 for (i = 0; i < num_mcv; i++)
    2335                 :           4 :                                         mcv_counts[i] = track[i].count;
    2336                 :             : 
    2337                 :           6 :                                 num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
    2338                 :           3 :                                                                                    stats->stadistinct,
    2339                 :           3 :                                                                                    stats->stanullfrac,
    2340                 :           3 :                                                                                    samplerows, totalrows);
    2341                 :           3 :                         }
    2342                 :           3 :                 }
    2343                 :             : 
    2344                 :             :                 /* Generate MCV slot entry */
    2345         [ -  + ]:           9 :                 if (num_mcv > 0)
    2346                 :             :                 {
    2347                 :           9 :                         MemoryContext old_context;
    2348                 :           9 :                         Datum      *mcv_values;
    2349                 :           9 :                         float4     *mcv_freqs;
    2350                 :             : 
    2351                 :             :                         /* Must copy the target values into anl_context */
    2352                 :           9 :                         old_context = MemoryContextSwitchTo(stats->anl_context);
    2353                 :           9 :                         mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
    2354                 :           9 :                         mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
    2355         [ +  + ]:          39 :                         for (i = 0; i < num_mcv; i++)
    2356                 :             :                         {
    2357                 :          60 :                                 mcv_values[i] = datumCopy(track[i].value,
    2358                 :          30 :                                                                                   stats->attrtype->typbyval,
    2359                 :          30 :                                                                                   stats->attrtype->typlen);
    2360                 :          30 :                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
    2361                 :          30 :                         }
    2362                 :           9 :                         MemoryContextSwitchTo(old_context);
    2363                 :             : 
    2364                 :           9 :                         stats->stakind[0] = STATISTIC_KIND_MCV;
    2365                 :           9 :                         stats->staop[0] = mystats->eqopr;
    2366                 :           9 :                         stats->stacoll[0] = stats->attrcollid;
    2367                 :           9 :                         stats->stanumbers[0] = mcv_freqs;
    2368                 :           9 :                         stats->numnumbers[0] = num_mcv;
    2369                 :           9 :                         stats->stavalues[0] = mcv_values;
    2370                 :           9 :                         stats->numvalues[0] = num_mcv;
    2371                 :             : 
    2372                 :             :                         /*
    2373                 :             :                          * Accept the defaults for stats->statypid and others. They have
    2374                 :             :                          * been set before we were called (see vacuum.h)
    2375                 :             :                          */
    2376                 :           9 :                 }
    2377                 :           9 :         }
    2378         [ -  + ]:           4 :         else if (null_cnt > 0)
    2379                 :             :         {
    2380                 :             :                 /* We found only nulls; assume the column is entirely null */
    2381                 :           4 :                 stats->stats_valid = true;
    2382                 :           4 :                 stats->stanullfrac = 1.0;
    2383         [ +  - ]:           4 :                 if (is_varwidth)
    2384                 :           4 :                         stats->stawidth = 0; /* "unknown" */
    2385                 :             :                 else
    2386                 :           0 :                         stats->stawidth = stats->attrtype->typlen;
    2387                 :           4 :                 stats->stadistinct = 0.0;    /* "unknown" */
    2388                 :           4 :         }
    2389                 :             : 
    2390                 :             :         /* We don't need to bother cleaning up any of our temporary palloc's */
    2391                 :          13 : }
    2392                 :             : 
    2393                 :             : 
    2394                 :             : /*
    2395                 :             :  *      compute_scalar_stats() -- compute column statistics
    2396                 :             :  *
    2397                 :             :  *      We use this when we can find "=" and "<" operators for the datatype.
    2398                 :             :  *
    2399                 :             :  *      We determine the fraction of non-null rows, the average width, the
    2400                 :             :  *      most common values, the (estimated) number of distinct values, the
    2401                 :             :  *      distribution histogram, and the correlation of physical to logical order.
    2402                 :             :  *
    2403                 :             :  *      The desired stats can be determined fairly easily after sorting the
    2404                 :             :  *      data values into order.
    2405                 :             :  */
    2406                 :             : static void
    2407                 :        2442 : compute_scalar_stats(VacAttrStatsP stats,
    2408                 :             :                                          AnalyzeAttrFetchFunc fetchfunc,
    2409                 :             :                                          int samplerows,
    2410                 :             :                                          double totalrows)
    2411                 :             : {
    2412                 :        2442 :         int                     i;
    2413                 :        2442 :         int                     null_cnt = 0;
    2414                 :        2442 :         int                     nonnull_cnt = 0;
    2415                 :        2442 :         int                     toowide_cnt = 0;
    2416                 :        2442 :         double          total_width = 0;
    2417         [ +  + ]:        3176 :         bool            is_varlena = (!stats->attrtype->typbyval &&
    2418                 :         734 :                                                           stats->attrtype->typlen == -1);
    2419         [ +  + ]:        3176 :         bool            is_varwidth = (!stats->attrtype->typbyval &&
    2420                 :         734 :                                                            stats->attrtype->typlen < 0);
    2421                 :        2442 :         double          corr_xysum;
    2422                 :        2442 :         SortSupportData ssup;
    2423                 :        2442 :         ScalarItem *values;
    2424                 :        2442 :         int                     values_cnt = 0;
    2425                 :        2442 :         int                *tupnoLink;
    2426                 :        2442 :         ScalarMCVItem *track;
    2427                 :        2442 :         int                     track_cnt = 0;
    2428                 :        2442 :         int                     num_mcv = stats->attstattarget;
    2429                 :        2442 :         int                     num_bins = stats->attstattarget;
    2430                 :        2442 :         StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
    2431                 :             : 
    2432                 :        2442 :         values = (ScalarItem *) palloc(samplerows * sizeof(ScalarItem));
    2433                 :        2442 :         tupnoLink = (int *) palloc(samplerows * sizeof(int));
    2434                 :        2442 :         track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
    2435                 :             : 
    2436                 :        2442 :         memset(&ssup, 0, sizeof(ssup));
    2437                 :        2442 :         ssup.ssup_cxt = CurrentMemoryContext;
    2438                 :        2442 :         ssup.ssup_collation = stats->attrcollid;
    2439                 :        2442 :         ssup.ssup_nulls_first = false;
    2440                 :             : 
    2441                 :             :         /*
    2442                 :             :          * For now, don't perform abbreviated key conversion, because full values
    2443                 :             :          * are required for MCV slot generation.  Supporting that optimization
    2444                 :             :          * would necessitate teaching compare_scalars() to call a tie-breaker.
    2445                 :             :          */
    2446                 :        2442 :         ssup.abbreviate = false;
    2447                 :             : 
    2448                 :        2442 :         PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
    2449                 :             : 
    2450                 :             :         /* Initial scan to find sortable values */
    2451         [ +  + ]:     3669155 :         for (i = 0; i < samplerows; i++)
    2452                 :             :         {
    2453                 :     3666713 :                 Datum           value;
    2454                 :     3666713 :                 bool            isnull;
    2455                 :             : 
    2456                 :     3666713 :                 vacuum_delay_point(true);
    2457                 :             : 
    2458                 :     3666713 :                 value = fetchfunc(stats, i, &isnull);
    2459                 :             : 
    2460                 :             :                 /* Check for null/nonnull */
    2461         [ +  + ]:     3666713 :                 if (isnull)
    2462                 :             :                 {
    2463                 :      401316 :                         null_cnt++;
    2464                 :      401316 :                         continue;
    2465                 :             :                 }
    2466                 :     3265397 :                 nonnull_cnt++;
    2467                 :             : 
    2468                 :             :                 /*
    2469                 :             :                  * If it's a variable-width field, add up widths for average width
    2470                 :             :                  * calculation.  Note that if the value is toasted, we use the toasted
    2471                 :             :                  * width.  We don't bother with this calculation if it's a fixed-width
    2472                 :             :                  * type.
    2473                 :             :                  */
    2474         [ +  + ]:     3265397 :                 if (is_varlena)
    2475                 :             :                 {
    2476                 :      708657 :                         total_width += VARSIZE_ANY(DatumGetPointer(value));
    2477                 :             : 
    2478                 :             :                         /*
    2479                 :             :                          * If the value is toasted, we want to detoast it just once to
    2480                 :             :                          * avoid repeated detoastings and resultant excess memory usage
    2481                 :             :                          * during the comparisons.  Also, check to see if the value is
    2482                 :             :                          * excessively wide, and if so don't detoast at all --- just
    2483                 :             :                          * ignore the value.
    2484                 :             :                          */
    2485         [ +  + ]:      708657 :                         if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
    2486                 :             :                         {
    2487                 :         208 :                                 toowide_cnt++;
    2488                 :         208 :                                 continue;
    2489                 :             :                         }
    2490                 :      708449 :                         value = PointerGetDatum(PG_DETOAST_DATUM(value));
    2491                 :      708449 :                 }
    2492         [ +  - ]:     2556740 :                 else if (is_varwidth)
    2493                 :             :                 {
    2494                 :             :                         /* must be cstring */
    2495                 :           0 :                         total_width += strlen(DatumGetCString(value)) + 1;
    2496                 :           0 :                 }
    2497                 :             : 
    2498                 :             :                 /* Add it to the list to be sorted */
    2499                 :     3265189 :                 values[values_cnt].value = value;
    2500                 :     3265189 :                 values[values_cnt].tupno = values_cnt;
    2501                 :     3265189 :                 tupnoLink[values_cnt] = values_cnt;
    2502                 :     3265189 :                 values_cnt++;
    2503         [ +  + ]:     3666713 :         }
    2504                 :             : 
    2505                 :             :         /* We can only compute real stats if we found some sortable values. */
    2506         [ +  + ]:        2442 :         if (values_cnt > 0)
    2507                 :             :         {
    2508                 :        2288 :                 int                     ndistinct,      /* # distinct values in sample */
    2509                 :             :                                         nmultiple,      /* # that appear multiple times */
    2510                 :             :                                         num_hist,
    2511                 :             :                                         dups_cnt;
    2512                 :        2288 :                 int                     slot_idx = 0;
    2513                 :        2288 :                 CompareScalarsContext cxt;
    2514                 :             : 
    2515                 :             :                 /* Sort the collected values */
    2516                 :        2288 :                 cxt.ssup = &ssup;
    2517                 :        2288 :                 cxt.tupnoLink = tupnoLink;
    2518                 :        2288 :                 qsort_interruptible(values, values_cnt, sizeof(ScalarItem),
    2519                 :             :                                                         compare_scalars, &cxt);
    2520                 :             : 
    2521                 :             :                 /*
    2522                 :             :                  * Now scan the values in order, find the most common ones, and also
    2523                 :             :                  * accumulate ordering-correlation statistics.
    2524                 :             :                  *
    2525                 :             :                  * To determine which are most common, we first have to count the
    2526                 :             :                  * number of duplicates of each value.  The duplicates are adjacent in
    2527                 :             :                  * the sorted list, so a brute-force approach is to compare successive
    2528                 :             :                  * datum values until we find two that are not equal. However, that
    2529                 :             :                  * requires N-1 invocations of the datum comparison routine, which are
    2530                 :             :                  * completely redundant with work that was done during the sort.  (The
    2531                 :             :                  * sort algorithm must at some point have compared each pair of items
    2532                 :             :                  * that are adjacent in the sorted order; otherwise it could not know
    2533                 :             :                  * that it's ordered the pair correctly.) We exploit this by having
    2534                 :             :                  * compare_scalars remember the highest tupno index that each
    2535                 :             :                  * ScalarItem has been found equal to.  At the end of the sort, a
    2536                 :             :                  * ScalarItem's tupnoLink will still point to itself if and only if it
    2537                 :             :                  * is the last item of its group of duplicates (since the group will
    2538                 :             :                  * be ordered by tupno).
    2539                 :             :                  */
    2540                 :        2288 :                 corr_xysum = 0;
    2541                 :        2288 :                 ndistinct = 0;
    2542                 :        2288 :                 nmultiple = 0;
    2543                 :        2288 :                 dups_cnt = 0;
    2544         [ +  + ]:     3267477 :                 for (i = 0; i < values_cnt; i++)
    2545                 :             :                 {
    2546                 :     3265189 :                         int                     tupno = values[i].tupno;
    2547                 :             : 
    2548                 :     3265189 :                         corr_xysum += ((double) i) * ((double) tupno);
    2549                 :     3265189 :                         dups_cnt++;
    2550         [ +  + ]:     3265189 :                         if (tupnoLink[tupno] == tupno)
    2551                 :             :                         {
    2552                 :             :                                 /* Reached end of duplicates of this value */
    2553                 :     1012794 :                                 ndistinct++;
    2554         [ +  + ]:     1012794 :                                 if (dups_cnt > 1)
    2555                 :             :                                 {
    2556                 :       50428 :                                         nmultiple++;
    2557   [ +  +  +  + ]:       50428 :                                         if (track_cnt < num_mcv ||
    2558                 :       26274 :                                                 dups_cnt > track[track_cnt - 1].count)
    2559                 :             :                                         {
    2560                 :             :                                                 /*
    2561                 :             :                                                  * Found a new item for the mcv list; find its
    2562                 :             :                                                  * position, bubbling down old items if needed. Loop
    2563                 :             :                                                  * invariant is that j points at an empty/ replaceable
    2564                 :             :                                                  * slot.
    2565                 :             :                                                  */
    2566                 :       24907 :                                                 int                     j;
    2567                 :             : 
    2568         [ +  + ]:       24907 :                                                 if (track_cnt < num_mcv)
    2569                 :       24154 :                                                         track_cnt++;
    2570         [ +  + ]:      102611 :                                                 for (j = track_cnt - 1; j > 0; j--)
    2571                 :             :                                                 {
    2572         [ +  + ]:      101004 :                                                         if (dups_cnt <= track[j - 1].count)
    2573                 :       23300 :                                                                 break;
    2574                 :       77704 :                                                         track[j].count = track[j - 1].count;
    2575                 :       77704 :                                                         track[j].first = track[j - 1].first;
    2576                 :       77704 :                                                 }
    2577                 :       24907 :                                                 track[j].count = dups_cnt;
    2578                 :       24907 :                                                 track[j].first = i + 1 - dups_cnt;
    2579                 :       24907 :                                         }
    2580                 :       50428 :                                 }
    2581                 :     1012794 :                                 dups_cnt = 0;
    2582                 :     1012794 :                         }
    2583                 :     3265189 :                 }
    2584                 :             : 
    2585                 :        2288 :                 stats->stats_valid = true;
    2586                 :             :                 /* Do the simple null-frac and width stats */
    2587                 :        2288 :                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
    2588         [ +  + ]:        2288 :                 if (is_varwidth)
    2589                 :         552 :                         stats->stawidth = total_width / (double) nonnull_cnt;
    2590                 :             :                 else
    2591                 :        1736 :                         stats->stawidth = stats->attrtype->typlen;
    2592                 :             : 
    2593         [ +  + ]:        2288 :                 if (nmultiple == 0)
    2594                 :             :                 {
    2595                 :             :                         /*
    2596                 :             :                          * If we found no repeated non-null values, assume it's a unique
    2597                 :             :                          * column; but be sure to discount for any nulls we found.
    2598                 :             :                          */
    2599                 :        1039 :                         stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
    2600                 :        1039 :                 }
    2601   [ +  +  +  + ]:        1249 :                 else if (toowide_cnt == 0 && nmultiple == ndistinct)
    2602                 :             :                 {
    2603                 :             :                         /*
    2604                 :             :                          * Every value in the sample appeared more than once.  Assume the
    2605                 :             :                          * column has just these values.  (This case is meant to address
    2606                 :             :                          * columns with small, fixed sets of possible values, such as
    2607                 :             :                          * boolean or enum columns.  If there are any values that appear
    2608                 :             :                          * just once in the sample, including too-wide values, we should
    2609                 :             :                          * assume that that's not what we're dealing with.)
    2610                 :             :                          */
    2611                 :        1039 :                         stats->stadistinct = ndistinct;
    2612                 :        1039 :                 }
    2613                 :             :                 else
    2614                 :             :                 {
    2615                 :             :                         /*----------
    2616                 :             :                          * Estimate the number of distinct values using the estimator
    2617                 :             :                          * proposed by Haas and Stokes in IBM Research Report RJ 10025:
    2618                 :             :                          *              n*d / (n - f1 + f1*n/N)
    2619                 :             :                          * where f1 is the number of distinct values that occurred
    2620                 :             :                          * exactly once in our sample of n rows (from a total of N),
    2621                 :             :                          * and d is the total number of distinct values in the sample.
    2622                 :             :                          * This is their Duj1 estimator; the other estimators they
    2623                 :             :                          * recommend are considerably more complex, and are numerically
    2624                 :             :                          * very unstable when n is much smaller than N.
    2625                 :             :                          *
    2626                 :             :                          * In this calculation, we consider only non-nulls.  We used to
    2627                 :             :                          * include rows with null values in the n and N counts, but that
    2628                 :             :                          * leads to inaccurate answers in columns with many nulls, and
    2629                 :             :                          * it's intuitively bogus anyway considering the desired result is
    2630                 :             :                          * the number of distinct non-null values.
    2631                 :             :                          *
    2632                 :             :                          * Overwidth values are assumed to have been distinct.
    2633                 :             :                          *----------
    2634                 :             :                          */
    2635                 :         210 :                         int                     f1 = ndistinct - nmultiple + toowide_cnt;
    2636                 :         210 :                         int                     d = f1 + nmultiple;
    2637                 :         210 :                         double          n = samplerows - null_cnt;
    2638                 :         210 :                         double          N = totalrows * (1.0 - stats->stanullfrac);
    2639                 :         210 :                         double          stadistinct;
    2640                 :             : 
    2641                 :             :                         /* N == 0 shouldn't happen, but just in case ... */
    2642         [ +  - ]:         210 :                         if (N > 0)
    2643                 :         210 :                                 stadistinct = (n * d) / ((n - f1) + f1 * n / N);
    2644                 :             :                         else
    2645                 :           0 :                                 stadistinct = 0;
    2646                 :             : 
    2647                 :             :                         /* Clamp to sane range in case of roundoff error */
    2648         [ +  + ]:         210 :                         if (stadistinct < d)
    2649                 :           6 :                                 stadistinct = d;
    2650         [ +  - ]:         210 :                         if (stadistinct > N)
    2651                 :           0 :                                 stadistinct = N;
    2652                 :             :                         /* And round to integer */
    2653                 :         210 :                         stats->stadistinct = floor(stadistinct + 0.5);
    2654                 :         210 :                 }
    2655                 :             : 
    2656                 :             :                 /*
    2657                 :             :                  * If we estimated the number of distinct values at more than 10% of
    2658                 :             :                  * the total row count (a very arbitrary limit), then assume that
    2659                 :             :                  * stadistinct should scale with the row count rather than be a fixed
    2660                 :             :                  * value.
    2661                 :             :                  */
    2662         [ +  + ]:        2288 :                 if (stats->stadistinct > 0.1 * totalrows)
    2663                 :         333 :                         stats->stadistinct = -(stats->stadistinct / totalrows);
    2664                 :             : 
    2665                 :             :                 /*
    2666                 :             :                  * Decide how many values are worth storing as most-common values. If
    2667                 :             :                  * we are able to generate a complete MCV list (all the values in the
    2668                 :             :                  * sample will fit, and we think these are all the ones in the table),
    2669                 :             :                  * then do so.  Otherwise, store only those values that are
    2670                 :             :                  * significantly more common than the values not in the list.
    2671                 :             :                  *
    2672                 :             :                  * Note: the first of these cases is meant to address columns with
    2673                 :             :                  * small, fixed sets of possible values, such as boolean or enum
    2674                 :             :                  * columns.  If we can *completely* represent the column population by
    2675                 :             :                  * an MCV list that will fit into the stats target, then we should do
    2676                 :             :                  * so and thus provide the planner with complete information.  But if
    2677                 :             :                  * the MCV list is not complete, it's generally worth being more
    2678                 :             :                  * selective, and not just filling it all the way up to the stats
    2679                 :             :                  * target.
    2680                 :             :                  */
    2681   [ +  +  +  + ]:        2288 :                 if (track_cnt == ndistinct && toowide_cnt == 0 &&
    2682   [ +  +  -  + ]:        1005 :                         stats->stadistinct > 0 &&
    2683                 :         836 :                         track_cnt <= num_mcv)
    2684                 :             :                 {
    2685                 :             :                         /* Track list includes all values seen, and all will fit */
    2686                 :         836 :                         num_mcv = track_cnt;
    2687                 :         836 :                 }
    2688                 :             :                 else
    2689                 :             :                 {
    2690                 :        1452 :                         int                *mcv_counts;
    2691                 :             : 
    2692                 :             :                         /* Incomplete list; decide how many values are worth keeping */
    2693         [ +  + ]:        1452 :                         if (num_mcv > track_cnt)
    2694                 :        1381 :                                 num_mcv = track_cnt;
    2695                 :             : 
    2696         [ +  + ]:        1452 :                         if (num_mcv > 0)
    2697                 :             :                         {
    2698                 :         413 :                                 mcv_counts = (int *) palloc(num_mcv * sizeof(int));
    2699         [ +  + ]:       12247 :                                 for (i = 0; i < num_mcv; i++)
    2700                 :       11834 :                                         mcv_counts[i] = track[i].count;
    2701                 :             : 
    2702                 :         826 :                                 num_mcv = analyze_mcv_list(mcv_counts, num_mcv,
    2703                 :         413 :                                                                                    stats->stadistinct,
    2704                 :         413 :                                                                                    stats->stanullfrac,
    2705                 :         413 :                                                                                    samplerows, totalrows);
    2706                 :         413 :                         }
    2707                 :        1452 :                 }
    2708                 :             : 
    2709                 :             :                 /* Generate MCV slot entry */
    2710         [ +  + ]:        2288 :                 if (num_mcv > 0)
    2711                 :             :                 {
    2712                 :        1249 :                         MemoryContext old_context;
    2713                 :        1249 :                         Datum      *mcv_values;
    2714                 :        1249 :                         float4     *mcv_freqs;
    2715                 :             : 
    2716                 :             :                         /* Must copy the target values into anl_context */
    2717                 :        1249 :                         old_context = MemoryContextSwitchTo(stats->anl_context);
    2718                 :        1249 :                         mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
    2719                 :        1249 :                         mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
    2720         [ +  + ]:       25403 :                         for (i = 0; i < num_mcv; i++)
    2721                 :             :                         {
    2722                 :       48308 :                                 mcv_values[i] = datumCopy(values[track[i].first].value,
    2723                 :       24154 :                                                                                   stats->attrtype->typbyval,
    2724                 :       24154 :                                                                                   stats->attrtype->typlen);
    2725                 :       24154 :                                 mcv_freqs[i] = (double) track[i].count / (double) samplerows;
    2726                 :       24154 :                         }
    2727                 :        1249 :                         MemoryContextSwitchTo(old_context);
    2728                 :             : 
    2729                 :        1249 :                         stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
    2730                 :        1249 :                         stats->staop[slot_idx] = mystats->eqopr;
    2731                 :        1249 :                         stats->stacoll[slot_idx] = stats->attrcollid;
    2732                 :        1249 :                         stats->stanumbers[slot_idx] = mcv_freqs;
    2733                 :        1249 :                         stats->numnumbers[slot_idx] = num_mcv;
    2734                 :        1249 :                         stats->stavalues[slot_idx] = mcv_values;
    2735                 :        1249 :                         stats->numvalues[slot_idx] = num_mcv;
    2736                 :             : 
    2737                 :             :                         /*
    2738                 :             :                          * Accept the defaults for stats->statypid and others. They have
    2739                 :             :                          * been set before we were called (see vacuum.h)
    2740                 :             :                          */
    2741                 :        1249 :                         slot_idx++;
    2742                 :        1249 :                 }
    2743                 :             : 
    2744                 :             :                 /*
    2745                 :             :                  * Generate a histogram slot entry if there are at least two distinct
    2746                 :             :                  * values not accounted for in the MCV list.  (This ensures the
    2747                 :             :                  * histogram won't collapse to empty or a singleton.)
    2748                 :             :                  */
    2749                 :        2288 :                 num_hist = ndistinct - num_mcv;
    2750         [ +  + ]:        2288 :                 if (num_hist > num_bins)
    2751                 :         456 :                         num_hist = num_bins + 1;
    2752         [ +  + ]:        2288 :                 if (num_hist >= 2)
    2753                 :             :                 {
    2754                 :        1135 :                         MemoryContext old_context;
    2755                 :        1135 :                         Datum      *hist_values;
    2756                 :        1135 :                         int                     nvals;
    2757                 :        1135 :                         int                     pos,
    2758                 :             :                                                 posfrac,
    2759                 :             :                                                 delta,
    2760                 :             :                                                 deltafrac;
    2761                 :             : 
    2762                 :             :                         /* Sort the MCV items into position order to speed next loop */
    2763                 :        1135 :                         qsort_interruptible(track, num_mcv, sizeof(ScalarMCVItem),
    2764                 :             :                                                                 compare_mcvs, NULL);
    2765                 :             : 
    2766                 :             :                         /*
    2767                 :             :                          * Collapse out the MCV items from the values[] array.
    2768                 :             :                          *
    2769                 :             :                          * Note we destroy the values[] array here... but we don't need it
    2770                 :             :                          * for anything more.  We do, however, still need values_cnt.
    2771                 :             :                          * nvals will be the number of remaining entries in values[].
    2772                 :             :                          */
    2773         [ +  + ]:        1135 :                         if (num_mcv > 0)
    2774                 :             :                         {
    2775                 :         185 :                                 int                     src,
    2776                 :             :                                                         dest;
    2777                 :         185 :                                 int                     j;
    2778                 :             : 
    2779                 :         185 :                                 src = dest = 0;
    2780                 :         185 :                                 j = 0;                  /* index of next interesting MCV item */
    2781         [ +  + ]:        9605 :                                 while (src < values_cnt)
    2782                 :             :                                 {
    2783                 :        9420 :                                         int                     ncopy;
    2784                 :             : 
    2785         [ +  + ]:        9420 :                                         if (j < num_mcv)
    2786                 :             :                                         {
    2787                 :        9274 :                                                 int                     first = track[j].first;
    2788                 :             : 
    2789         [ +  + ]:        9274 :                                                 if (src >= first)
    2790                 :             :                                                 {
    2791                 :             :                                                         /* advance past this MCV item */
    2792                 :        7880 :                                                         src = first + track[j].count;
    2793                 :        7880 :                                                         j++;
    2794                 :        7880 :                                                         continue;
    2795                 :             :                                                 }
    2796                 :        1394 :                                                 ncopy = first - src;
    2797         [ +  + ]:        9274 :                                         }
    2798                 :             :                                         else
    2799                 :         146 :                                                 ncopy = values_cnt - src;
    2800                 :        1540 :                                         memmove(&values[dest], &values[src],
    2801                 :             :                                                         ncopy * sizeof(ScalarItem));
    2802                 :        1540 :                                         src += ncopy;
    2803                 :        1540 :                                         dest += ncopy;
    2804         [ +  + ]:        9420 :                                 }
    2805                 :         185 :                                 nvals = dest;
    2806                 :         185 :                         }
    2807                 :             :                         else
    2808                 :         950 :                                 nvals = values_cnt;
    2809         [ +  - ]:        1135 :                         Assert(nvals >= num_hist);
    2810                 :             : 
    2811                 :             :                         /* Must copy the target values into anl_context */
    2812                 :        1135 :                         old_context = MemoryContextSwitchTo(stats->anl_context);
    2813                 :        1135 :                         hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
    2814                 :             : 
    2815                 :             :                         /*
    2816                 :             :                          * The object of this loop is to copy the first and last values[]
    2817                 :             :                          * entries along with evenly-spaced values in between.  So the
    2818                 :             :                          * i'th value is values[(i * (nvals - 1)) / (num_hist - 1)].  But
    2819                 :             :                          * computing that subscript directly risks integer overflow when
    2820                 :             :                          * the stats target is more than a couple thousand.  Instead we
    2821                 :             :                          * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
    2822                 :             :                          * the integral and fractional parts of the sum separately.
    2823                 :             :                          */
    2824                 :        1135 :                         delta = (nvals - 1) / (num_hist - 1);
    2825                 :        1135 :                         deltafrac = (nvals - 1) % (num_hist - 1);
    2826                 :        1135 :                         pos = posfrac = 0;
    2827                 :             : 
    2828         [ +  + ]:      105079 :                         for (i = 0; i < num_hist; i++)
    2829                 :             :                         {
    2830                 :      207888 :                                 hist_values[i] = datumCopy(values[pos].value,
    2831                 :      103944 :                                                                                    stats->attrtype->typbyval,
    2832                 :      103944 :                                                                                    stats->attrtype->typlen);
    2833                 :      103944 :                                 pos += delta;
    2834                 :      103944 :                                 posfrac += deltafrac;
    2835         [ +  + ]:      103944 :                                 if (posfrac >= (num_hist - 1))
    2836                 :             :                                 {
    2837                 :             :                                         /* fractional part exceeds 1, carry to integer part */
    2838                 :       33365 :                                         pos++;
    2839                 :       33365 :                                         posfrac -= (num_hist - 1);
    2840                 :       33365 :                                 }
    2841                 :      103944 :                         }
    2842                 :             : 
    2843                 :        1135 :                         MemoryContextSwitchTo(old_context);
    2844                 :             : 
    2845                 :        1135 :                         stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
    2846                 :        1135 :                         stats->staop[slot_idx] = mystats->ltopr;
    2847                 :        1135 :                         stats->stacoll[slot_idx] = stats->attrcollid;
    2848                 :        1135 :                         stats->stavalues[slot_idx] = hist_values;
    2849                 :        1135 :                         stats->numvalues[slot_idx] = num_hist;
    2850                 :             : 
    2851                 :             :                         /*
    2852                 :             :                          * Accept the defaults for stats->statypid and others. They have
    2853                 :             :                          * been set before we were called (see vacuum.h)
    2854                 :             :                          */
    2855                 :        1135 :                         slot_idx++;
    2856                 :        1135 :                 }
    2857                 :             : 
    2858                 :             :                 /* Generate a correlation entry if there are multiple values */
    2859         [ +  + ]:        2288 :                 if (values_cnt > 1)
    2860                 :             :                 {
    2861                 :        2199 :                         MemoryContext old_context;
    2862                 :        2199 :                         float4     *corrs;
    2863                 :        2199 :                         double          corr_xsum,
    2864                 :             :                                                 corr_x2sum;
    2865                 :             : 
    2866                 :             :                         /* Must copy the target values into anl_context */
    2867                 :        2199 :                         old_context = MemoryContextSwitchTo(stats->anl_context);
    2868                 :        2199 :                         corrs = palloc_object(float4);
    2869                 :        2199 :                         MemoryContextSwitchTo(old_context);
    2870                 :             : 
    2871                 :             :                         /*----------
    2872                 :             :                          * Since we know the x and y value sets are both
    2873                 :             :                          *              0, 1, ..., values_cnt-1
    2874                 :             :                          * we have sum(x) = sum(y) =
    2875                 :             :                          *              (values_cnt-1)*values_cnt / 2
    2876                 :             :                          * and sum(x^2) = sum(y^2) =
    2877                 :             :                          *              (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
    2878                 :             :                          *----------
    2879                 :             :                          */
    2880                 :        6597 :                         corr_xsum = ((double) (values_cnt - 1)) *
    2881                 :        4398 :                                 ((double) values_cnt) / 2.0;
    2882                 :        6597 :                         corr_x2sum = ((double) (values_cnt - 1)) *
    2883                 :        4398 :                                 ((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;
    2884                 :             : 
    2885                 :             :                         /* And the correlation coefficient reduces to */
    2886                 :        4398 :                         corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
    2887                 :        2199 :                                 (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
    2888                 :             : 
    2889                 :        2199 :                         stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
    2890                 :        2199 :                         stats->staop[slot_idx] = mystats->ltopr;
    2891                 :        2199 :                         stats->stacoll[slot_idx] = stats->attrcollid;
    2892                 :        2199 :                         stats->stanumbers[slot_idx] = corrs;
    2893                 :        2199 :                         stats->numnumbers[slot_idx] = 1;
    2894                 :        2199 :                         slot_idx++;
    2895                 :        2199 :                 }
    2896                 :        2288 :         }
    2897         [ +  + ]:         154 :         else if (nonnull_cnt > 0)
    2898                 :             :         {
    2899                 :             :                 /* We found some non-null values, but they were all too wide */
    2900         [ +  - ]:           2 :                 Assert(nonnull_cnt == toowide_cnt);
    2901                 :           2 :                 stats->stats_valid = true;
    2902                 :             :                 /* Do the simple null-frac and width stats */
    2903                 :           2 :                 stats->stanullfrac = (double) null_cnt / (double) samplerows;
    2904         [ +  - ]:           2 :                 if (is_varwidth)
    2905                 :           2 :                         stats->stawidth = total_width / (double) nonnull_cnt;
    2906                 :             :                 else
    2907                 :           0 :                         stats->stawidth = stats->attrtype->typlen;
    2908                 :             :                 /* Assume all too-wide values are distinct, so it's a unique column */
    2909                 :           2 :                 stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
    2910                 :           2 :         }
    2911         [ -  + ]:         152 :         else if (null_cnt > 0)
    2912                 :             :         {
    2913                 :             :                 /* We found only nulls; assume the column is entirely null */
    2914                 :         152 :                 stats->stats_valid = true;
    2915                 :         152 :                 stats->stanullfrac = 1.0;
    2916         [ +  + ]:         152 :                 if (is_varwidth)
    2917                 :         107 :                         stats->stawidth = 0; /* "unknown" */
    2918                 :             :                 else
    2919                 :          45 :                         stats->stawidth = stats->attrtype->typlen;
    2920                 :         152 :                 stats->stadistinct = 0.0;    /* "unknown" */
    2921                 :         152 :         }
    2922                 :             : 
    2923                 :             :         /* We don't need to bother cleaning up any of our temporary palloc's */
    2924                 :        2442 : }
    2925                 :             : 
    2926                 :             : /*
    2927                 :             :  * Comparator for sorting ScalarItems
    2928                 :             :  *
    2929                 :             :  * Aside from sorting the items, we update the tupnoLink[] array
    2930                 :             :  * whenever two ScalarItems are found to contain equal datums.  The array
    2931                 :             :  * is indexed by tupno; for each ScalarItem, it contains the highest
    2932                 :             :  * tupno that that item's datum has been found to be equal to.  This allows
    2933                 :             :  * us to avoid additional comparisons in compute_scalar_stats().
    2934                 :             :  */
    2935                 :             : static int
    2936                 :    34631455 : compare_scalars(const void *a, const void *b, void *arg)
    2937                 :             : {
    2938                 :    34631455 :         Datum           da = ((const ScalarItem *) a)->value;
    2939                 :    34631455 :         int                     ta = ((const ScalarItem *) a)->tupno;
    2940                 :    34631455 :         Datum           db = ((const ScalarItem *) b)->value;
    2941                 :    34631455 :         int                     tb = ((const ScalarItem *) b)->tupno;
    2942                 :    34631455 :         CompareScalarsContext *cxt = (CompareScalarsContext *) arg;
    2943                 :    34631455 :         int                     compare;
    2944                 :             : 
    2945                 :    34631455 :         compare = ApplySortComparator(da, false, db, false, cxt->ssup);
    2946         [ +  + ]:    34631455 :         if (compare != 0)
    2947                 :    16396265 :                 return compare;
    2948                 :             : 
    2949                 :             :         /*
    2950                 :             :          * The two datums are equal, so update cxt->tupnoLink[].
    2951                 :             :          */
    2952         [ +  + ]:    18235190 :         if (cxt->tupnoLink[ta] < tb)
    2953                 :     2187365 :                 cxt->tupnoLink[ta] = tb;
    2954         [ +  + ]:    18235190 :         if (cxt->tupnoLink[tb] < ta)
    2955                 :      206395 :                 cxt->tupnoLink[tb] = ta;
    2956                 :             : 
    2957                 :             :         /*
    2958                 :             :          * For equal datums, sort by tupno
    2959                 :             :          */
    2960                 :    18235190 :         return ta - tb;
    2961                 :    34631455 : }
    2962                 :             : 
    2963                 :             : /*
    2964                 :             :  * Comparator for sorting ScalarMCVItems by position
    2965                 :             :  */
    2966                 :             : static int
    2967                 :       21625 : compare_mcvs(const void *a, const void *b, void *arg)
    2968                 :             : {
    2969                 :       21625 :         int                     da = ((const ScalarMCVItem *) a)->first;
    2970                 :       21625 :         int                     db = ((const ScalarMCVItem *) b)->first;
    2971                 :             : 
    2972                 :       43250 :         return da - db;
    2973                 :       21625 : }
    2974                 :             : 
    2975                 :             : /*
    2976                 :             :  * Analyze the list of common values in the sample and decide how many are
    2977                 :             :  * worth storing in the table's MCV list.
    2978                 :             :  *
    2979                 :             :  * mcv_counts is assumed to be a list of the counts of the most common values
    2980                 :             :  * seen in the sample, starting with the most common.  The return value is the
    2981                 :             :  * number that are significantly more common than the values not in the list,
    2982                 :             :  * and which are therefore deemed worth storing in the table's MCV list.
    2983                 :             :  */
    2984                 :             : static int
    2985                 :         416 : analyze_mcv_list(int *mcv_counts,
    2986                 :             :                                  int num_mcv,
    2987                 :             :                                  double stadistinct,
    2988                 :             :                                  double stanullfrac,
    2989                 :             :                                  int samplerows,
    2990                 :             :                                  double totalrows)
    2991                 :             : {
    2992                 :         416 :         double          ndistinct_table;
    2993                 :         416 :         double          sumcount;
    2994                 :         416 :         int                     i;
    2995                 :             : 
    2996                 :             :         /*
    2997                 :             :          * If the entire table was sampled, keep the whole list.  This also
    2998                 :             :          * protects us against division by zero in the code below.
    2999                 :             :          */
    3000   [ -  +  #  # ]:         416 :         if (samplerows == totalrows || totalrows <= 1.0)
    3001                 :         416 :                 return num_mcv;
    3002                 :             : 
    3003                 :             :         /* Re-extract the estimated number of distinct nonnull values in table */
    3004                 :           0 :         ndistinct_table = stadistinct;
    3005         [ #  # ]:           0 :         if (ndistinct_table < 0)
    3006                 :           0 :                 ndistinct_table = -ndistinct_table * totalrows;
    3007                 :             : 
    3008                 :             :         /*
    3009                 :             :          * Exclude the least common values from the MCV list, if they are not
    3010                 :             :          * significantly more common than the estimated selectivity they would
    3011                 :             :          * have if they weren't in the list.  All non-MCV values are assumed to be
    3012                 :             :          * equally common, after taking into account the frequencies of all the
    3013                 :             :          * values in the MCV list and the number of nulls (c.f. eqsel()).
    3014                 :             :          *
    3015                 :             :          * Here sumcount tracks the total count of all but the last (least common)
    3016                 :             :          * value in the MCV list, allowing us to determine the effect of excluding
    3017                 :             :          * that value from the list.
    3018                 :             :          *
    3019                 :             :          * Note that we deliberately do this by removing values from the full
    3020                 :             :          * list, rather than starting with an empty list and adding values,
    3021                 :             :          * because the latter approach can fail to add any values if all the most
    3022                 :             :          * common values have around the same frequency and make up the majority
    3023                 :             :          * of the table, so that the overall average frequency of all values is
    3024                 :             :          * roughly the same as that of the common values.  This would lead to any
    3025                 :             :          * uncommon values being significantly overestimated.
    3026                 :             :          */
    3027                 :           0 :         sumcount = 0.0;
    3028         [ #  # ]:           0 :         for (i = 0; i < num_mcv - 1; i++)
    3029                 :           0 :                 sumcount += mcv_counts[i];
    3030                 :             : 
    3031         [ #  # ]:           0 :         while (num_mcv > 0)
    3032                 :             :         {
    3033                 :           0 :                 double          selec,
    3034                 :             :                                         otherdistinct,
    3035                 :             :                                         N,
    3036                 :             :                                         n,
    3037                 :             :                                         K,
    3038                 :             :                                         variance,
    3039                 :             :                                         stddev;
    3040                 :             : 
    3041                 :             :                 /*
    3042                 :             :                  * Estimated selectivity the least common value would have if it
    3043                 :             :                  * wasn't in the MCV list (c.f. eqsel()).
    3044                 :             :                  */
    3045                 :           0 :                 selec = 1.0 - sumcount / samplerows - stanullfrac;
    3046         [ #  # ]:           0 :                 if (selec < 0.0)
    3047                 :           0 :                         selec = 0.0;
    3048         [ #  # ]:           0 :                 if (selec > 1.0)
    3049                 :           0 :                         selec = 1.0;
    3050                 :           0 :                 otherdistinct = ndistinct_table - (num_mcv - 1);
    3051         [ #  # ]:           0 :                 if (otherdistinct > 1)
    3052                 :           0 :                         selec /= otherdistinct;
    3053                 :             : 
    3054                 :             :                 /*
    3055                 :             :                  * If the value is kept in the MCV list, its population frequency is
    3056                 :             :                  * assumed to equal its sample frequency.  We use the lower end of a
    3057                 :             :                  * textbook continuity-corrected Wald-type confidence interval to
    3058                 :             :                  * determine if that is significantly more common than the non-MCV
    3059                 :             :                  * frequency --- specifically we assume the population frequency is
    3060                 :             :                  * highly likely to be within around 2 standard errors of the sample
    3061                 :             :                  * frequency, which equates to an interval of 2 standard deviations
    3062                 :             :                  * either side of the sample count, plus an additional 0.5 for the
    3063                 :             :                  * continuity correction.  Since we are sampling without replacement,
    3064                 :             :                  * this is a hypergeometric distribution.
    3065                 :             :                  *
    3066                 :             :                  * XXX: Empirically, this approach seems to work quite well, but it
    3067                 :             :                  * may be worth considering more advanced techniques for estimating
    3068                 :             :                  * the confidence interval of the hypergeometric distribution.
    3069                 :             :                  */
    3070                 :           0 :                 N = totalrows;
    3071                 :           0 :                 n = samplerows;
    3072                 :           0 :                 K = N * mcv_counts[num_mcv - 1] / n;
    3073                 :           0 :                 variance = n * K * (N - K) * (N - n) / (N * N * (N - 1));
    3074                 :           0 :                 stddev = sqrt(variance);
    3075                 :             : 
    3076         [ #  # ]:           0 :                 if (mcv_counts[num_mcv - 1] > selec * samplerows + 2 * stddev + 0.5)
    3077                 :             :                 {
    3078                 :             :                         /*
    3079                 :             :                          * The value is significantly more common than the non-MCV
    3080                 :             :                          * selectivity would suggest.  Keep it, and all the other more
    3081                 :             :                          * common values in the list.
    3082                 :             :                          */
    3083                 :           0 :                         break;
    3084                 :             :                 }
    3085                 :             :                 else
    3086                 :             :                 {
    3087                 :             :                         /* Discard this value and consider the next least common value */
    3088                 :           0 :                         num_mcv--;
    3089         [ #  # ]:           0 :                         if (num_mcv == 0)
    3090                 :           0 :                                 break;
    3091                 :           0 :                         sumcount -= mcv_counts[num_mcv - 1];
    3092                 :             :                 }
    3093      [ #  #  # ]:           0 :         }
    3094                 :           0 :         return num_mcv;
    3095                 :         416 : }

Generated by: LCOV version 2.3.2-1