LCOV - code coverage report
Current view: top level - src/backend/statistics - mcv.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 91.7 % 833 764
Test Date: 2026-01-26 10:56:24 Functions: 77.3 % 22 17
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 62.1 % 520 323

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * mcv.c
       4                 :             :  *        POSTGRES multivariate MCV lists
       5                 :             :  *
       6                 :             :  *
       7                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/statistics/mcv.c
      12                 :             :  *
      13                 :             :  *-------------------------------------------------------------------------
      14                 :             :  */
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "access/htup_details.h"
      18                 :             : #include "catalog/pg_statistic_ext.h"
      19                 :             : #include "catalog/pg_statistic_ext_data.h"
      20                 :             : #include "fmgr.h"
      21                 :             : #include "funcapi.h"
      22                 :             : #include "nodes/nodeFuncs.h"
      23                 :             : #include "statistics/extended_stats_internal.h"
      24                 :             : #include "statistics/statistics.h"
      25                 :             : #include "utils/array.h"
      26                 :             : #include "utils/builtins.h"
      27                 :             : #include "utils/fmgrprotos.h"
      28                 :             : #include "utils/lsyscache.h"
      29                 :             : #include "utils/selfuncs.h"
      30                 :             : #include "utils/syscache.h"
      31                 :             : #include "utils/typcache.h"
      32                 :             : 
      33                 :             : /*
      34                 :             :  * Computes size of a serialized MCV item, depending on the number of
      35                 :             :  * dimensions (columns) the statistic is defined on. The datum values are
      36                 :             :  * stored in a separate array (deduplicated, to minimize the size), and
      37                 :             :  * so the serialized items only store uint16 indexes into that array.
      38                 :             :  *
      39                 :             :  * Each serialized item stores (in this order):
      40                 :             :  *
      41                 :             :  * - indexes to values    (ndim * sizeof(uint16))
      42                 :             :  * - null flags                   (ndim * sizeof(bool))
      43                 :             :  * - frequency                    (sizeof(double))
      44                 :             :  * - base_frequency               (sizeof(double))
      45                 :             :  *
      46                 :             :  * There is no alignment padding within an MCV item.
      47                 :             :  * So in total each MCV item requires this many bytes:
      48                 :             :  *
      49                 :             :  *       ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
      50                 :             :  */
      51                 :             : #define ITEM_SIZE(ndims)        \
      52                 :             :         ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
      53                 :             : 
      54                 :             : /*
      55                 :             :  * Used to compute size of serialized MCV list representation.
      56                 :             :  */
      57                 :             : #define MinSizeOfMCVList                \
      58                 :             :         (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
      59                 :             : 
      60                 :             : /*
      61                 :             :  * Size of the serialized MCV list, excluding the space needed for
      62                 :             :  * deduplicated per-dimension values. The macro is meant to be used
      63                 :             :  * when it's not yet safe to access the serialized info about amount
      64                 :             :  * of data for each column.
      65                 :             :  */
      66                 :             : #define SizeOfMCVList(ndims,nitems)     \
      67                 :             :         ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
      68                 :             :          ((ndims) * sizeof(DimensionInfo)) + \
      69                 :             :          ((nitems) * ITEM_SIZE(ndims)))
      70                 :             : 
      71                 :             : static MultiSortSupport build_mss(StatsBuildData *data);
      72                 :             : 
      73                 :             : static SortItem *build_distinct_groups(int numrows, SortItem *items,
      74                 :             :                                                                            MultiSortSupport mss, int *ndistinct);
      75                 :             : 
      76                 :             : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
      77                 :             :                                                                                    MultiSortSupport mss, int *ncounts);
      78                 :             : 
      79                 :             : static int      count_distinct_groups(int numrows, SortItem *items,
      80                 :             :                                                                   MultiSortSupport mss);
      81                 :             : 
      82                 :             : /*
      83                 :             :  * Compute new value for bitmap item, considering whether it's used for
      84                 :             :  * clauses connected by AND/OR.
      85                 :             :  */
      86                 :             : #define RESULT_MERGE(value, is_or, match) \
      87                 :             :         ((is_or) ? ((value) || (match)) : ((value) && (match)))
      88                 :             : 
      89                 :             : /*
      90                 :             :  * When processing a list of clauses, the bitmap item may get set to a value
      91                 :             :  * such that additional clauses can't change it. For example, when processing
      92                 :             :  * a list of clauses connected to AND, as soon as the item gets set to 'false'
      93                 :             :  * then it'll remain like that. Similarly clauses connected by OR and 'true'.
      94                 :             :  *
      95                 :             :  * Returns true when the value in the bitmap can't change no matter how the
      96                 :             :  * remaining clauses are evaluated.
      97                 :             :  */
      98                 :             : #define RESULT_IS_FINAL(value, is_or)   ((is_or) ? (value) : (!(value)))
      99                 :             : 
     100                 :             : /*
     101                 :             :  * get_mincount_for_mcv_list
     102                 :             :  *              Determine the minimum number of times a value needs to appear in
     103                 :             :  *              the sample for it to be included in the MCV list.
     104                 :             :  *
     105                 :             :  * We want to keep only values that appear sufficiently often in the
     106                 :             :  * sample that it is reasonable to extrapolate their sample frequencies to
     107                 :             :  * the entire table.  We do this by placing an upper bound on the relative
     108                 :             :  * standard error of the sample frequency, so that any estimates the
     109                 :             :  * planner generates from the MCV statistics can be expected to be
     110                 :             :  * reasonably accurate.
     111                 :             :  *
     112                 :             :  * Since we are sampling without replacement, the sample frequency of a
     113                 :             :  * particular value is described by a hypergeometric distribution.  A
     114                 :             :  * common rule of thumb when estimating errors in this situation is to
     115                 :             :  * require at least 10 instances of the value in the sample, in which case
     116                 :             :  * the distribution can be approximated by a normal distribution, and
     117                 :             :  * standard error analysis techniques can be applied.  Given a sample size
     118                 :             :  * of n, a population size of N, and a sample frequency of p=cnt/n, the
     119                 :             :  * standard error of the proportion p is given by
     120                 :             :  *              SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
     121                 :             :  * where the second term is the finite population correction.  To get
     122                 :             :  * reasonably accurate planner estimates, we impose an upper bound on the
     123                 :             :  * relative standard error of 20% -- i.e., SE/p < 0.2.  This 20% relative
     124                 :             :  * error bound is fairly arbitrary, but has been found empirically to work
     125                 :             :  * well.  Rearranging this formula gives a lower bound on the number of
     126                 :             :  * instances of the value seen:
     127                 :             :  *              cnt > n*(N-n) / (N-n+0.04*n*(N-1))
     128                 :             :  * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
     129                 :             :  * case where n approaches 0 cannot happen in practice, since the sample
     130                 :             :  * size is at least 300.  The case where n approaches N corresponds to
     131                 :             :  * sampling the whole table, in which case it is reasonable to keep
     132                 :             :  * the whole MCV list (have no lower bound), so it makes sense to apply
     133                 :             :  * this formula for all inputs, even though the above derivation is
     134                 :             :  * technically only valid when the right hand side is at least around 10.
     135                 :             :  *
     136                 :             :  * An alternative way to look at this formula is as follows -- assume that
     137                 :             :  * the number of instances of the value seen scales up to the entire
     138                 :             :  * table, so that the population count is K=N*cnt/n. Then the distribution
     139                 :             :  * in the sample is a hypergeometric distribution parameterised by N, n
     140                 :             :  * and K, and the bound above is mathematically equivalent to demanding
     141                 :             :  * that the standard deviation of that distribution is less than 20% of
     142                 :             :  * its mean.  Thus the relative errors in any planner estimates produced
     143                 :             :  * from the MCV statistics are likely to be not too large.
     144                 :             :  */
     145                 :             : static double
     146                 :          36 : get_mincount_for_mcv_list(int samplerows, double totalrows)
     147                 :             : {
     148                 :          36 :         double          n = samplerows;
     149                 :          36 :         double          N = totalrows;
     150                 :          36 :         double          numer,
     151                 :             :                                 denom;
     152                 :             : 
     153                 :          36 :         numer = n * (N - n);
     154                 :          36 :         denom = N - n + 0.04 * n * (N - 1);
     155                 :             : 
     156                 :             :         /* Guard against division by zero (possible if n = N = 1) */
     157         [ +  + ]:          36 :         if (denom == 0.0)
     158                 :           2 :                 return 0.0;
     159                 :             : 
     160                 :          34 :         return numer / denom;
     161                 :          36 : }
     162                 :             : 
     163                 :             : /*
     164                 :             :  * Builds MCV list from the set of sampled rows.
     165                 :             :  *
     166                 :             :  * The algorithm is quite simple:
     167                 :             :  *
     168                 :             :  *         (1) sort the data (default collation, '<' for the data type)
     169                 :             :  *
     170                 :             :  *         (2) count distinct groups, decide how many to keep
     171                 :             :  *
     172                 :             :  *         (3) build the MCV list using the threshold determined in (2)
     173                 :             :  *
     174                 :             :  *         (4) remove rows represented by the MCV from the sample
     175                 :             :  *
     176                 :             :  */
     177                 :             : MCVList *
     178                 :          36 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
     179                 :             : {
     180                 :          36 :         int                     i,
     181                 :             :                                 numattrs,
     182                 :             :                                 numrows,
     183                 :             :                                 ngroups,
     184                 :             :                                 nitems;
     185                 :          36 :         double          mincount;
     186                 :          36 :         SortItem   *items;
     187                 :          36 :         SortItem   *groups;
     188                 :          36 :         MCVList    *mcvlist = NULL;
     189                 :          36 :         MultiSortSupport mss;
     190                 :             : 
     191                 :             :         /* comparator for all the columns */
     192                 :          36 :         mss = build_mss(data);
     193                 :             : 
     194                 :             :         /* sort the rows */
     195                 :          72 :         items = build_sorted_items(data, &nitems, mss,
     196                 :          36 :                                                            data->nattnums, data->attnums);
     197                 :             : 
     198         [ +  - ]:          36 :         if (!items)
     199                 :           0 :                 return NULL;
     200                 :             : 
     201                 :             :         /* for convenience */
     202                 :          36 :         numattrs = data->nattnums;
     203                 :          36 :         numrows = data->numrows;
     204                 :             : 
     205                 :             :         /* transform the sorted rows into groups (sorted by frequency) */
     206                 :          36 :         groups = build_distinct_groups(nitems, items, mss, &ngroups);
     207                 :             : 
     208                 :             :         /*
     209                 :             :          * The maximum number of MCV items to store, based on the statistics
     210                 :             :          * target we computed for the statistics object (from the target set for
     211                 :             :          * the object itself, attributes and the system default). In any case, we
     212                 :             :          * can't keep more groups than we have available.
     213                 :             :          */
     214                 :          36 :         nitems = stattarget;
     215         [ +  + ]:          36 :         if (nitems > ngroups)
     216                 :          22 :                 nitems = ngroups;
     217                 :             : 
     218                 :             :         /*
     219                 :             :          * Decide how many items to keep in the MCV list. We can't use the same
     220                 :             :          * algorithm as per-column MCV lists, because that only considers the
     221                 :             :          * actual group frequency - but we're primarily interested in how the
     222                 :             :          * actual frequency differs from the base frequency (product of simple
     223                 :             :          * per-column frequencies, as if the columns were independent).
     224                 :             :          *
     225                 :             :          * Using the same algorithm might exclude items that are close to the
     226                 :             :          * "average" frequency of the sample. But that does not say whether the
     227                 :             :          * observed frequency is close to the base frequency or not. We also need
     228                 :             :          * to consider unexpectedly uncommon items (again, compared to the base
     229                 :             :          * frequency), and the single-column algorithm does not have to.
     230                 :             :          *
     231                 :             :          * We simply decide how many items to keep by computing the minimum count
     232                 :             :          * using get_mincount_for_mcv_list() and then keep all items that seem to
     233                 :             :          * be more common than that.
     234                 :             :          */
     235                 :          36 :         mincount = get_mincount_for_mcv_list(numrows, totalrows);
     236                 :             : 
     237                 :             :         /*
     238                 :             :          * Walk the groups until we find the first group with a count below the
     239                 :             :          * mincount threshold (the index of that group is the number of groups we
     240                 :             :          * want to keep).
     241                 :             :          */
     242         [ +  + ]:        1688 :         for (i = 0; i < nitems; i++)
     243                 :             :         {
     244         [ -  + ]:        1652 :                 if (groups[i].count < mincount)
     245                 :             :                 {
     246                 :           0 :                         nitems = i;
     247                 :           0 :                         break;
     248                 :             :                 }
     249                 :        1652 :         }
     250                 :             : 
     251                 :             :         /*
     252                 :             :          * At this point, we know the number of items for the MCV list. There
     253                 :             :          * might be none (for uniform distribution with many groups), and in that
     254                 :             :          * case, there will be no MCV list. Otherwise, construct the MCV list.
     255                 :             :          */
     256         [ -  + ]:          36 :         if (nitems > 0)
     257                 :             :         {
     258                 :          36 :                 int                     j;
     259                 :          36 :                 SortItem        key;
     260                 :          36 :                 MultiSortSupport tmp;
     261                 :             : 
     262                 :             :                 /* frequencies for values in each attribute */
     263                 :          36 :                 SortItem  **freqs;
     264                 :          36 :                 int                *nfreqs;
     265                 :             : 
     266                 :             :                 /* used to search values */
     267                 :          36 :                 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
     268                 :             :                                                                                 + sizeof(SortSupportData));
     269                 :             : 
     270                 :             :                 /* compute frequencies for values in each column */
     271                 :          36 :                 nfreqs = palloc0_array(int, numattrs);
     272                 :          36 :                 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
     273                 :             : 
     274                 :             :                 /*
     275                 :             :                  * Allocate the MCV list structure, set the global parameters.
     276                 :             :                  */
     277                 :          36 :                 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
     278                 :          36 :                                                                           sizeof(MCVItem) * nitems);
     279                 :             : 
     280                 :          36 :                 mcvlist->magic = STATS_MCV_MAGIC;
     281                 :          36 :                 mcvlist->type = STATS_MCV_TYPE_BASIC;
     282                 :          36 :                 mcvlist->ndimensions = numattrs;
     283                 :          36 :                 mcvlist->nitems = nitems;
     284                 :             : 
     285                 :             :                 /* store info about data type OIDs */
     286         [ +  + ]:         135 :                 for (i = 0; i < numattrs; i++)
     287                 :          99 :                         mcvlist->types[i] = data->stats[i]->attrtypid;
     288                 :             : 
     289                 :             :                 /* Copy the first chunk of groups into the result. */
     290         [ +  + ]:        1688 :                 for (i = 0; i < nitems; i++)
     291                 :             :                 {
     292                 :             :                         /* just point to the proper place in the list */
     293                 :        1652 :                         MCVItem    *item = &mcvlist->items[i];
     294                 :             : 
     295                 :        1652 :                         item->values = palloc_array(Datum, numattrs);
     296                 :        1652 :                         item->isnull = palloc_array(bool, numattrs);
     297                 :             : 
     298                 :             :                         /* copy values for the group */
     299                 :        1652 :                         memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
     300                 :        1652 :                         memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
     301                 :             : 
     302                 :             :                         /* groups should be sorted by frequency in descending order */
     303   [ +  +  +  - ]:        1652 :                         Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
     304                 :             : 
     305                 :             :                         /* group frequency */
     306                 :        1652 :                         item->frequency = (double) groups[i].count / numrows;
     307                 :             : 
     308                 :             :                         /* base frequency, if the attributes were independent */
     309                 :        1652 :                         item->base_frequency = 1.0;
     310         [ +  + ]:        6143 :                         for (j = 0; j < numattrs; j++)
     311                 :             :                         {
     312                 :        4491 :                                 SortItem   *freq;
     313                 :             : 
     314                 :             :                                 /* single dimension */
     315                 :        4491 :                                 tmp->ndims = 1;
     316                 :        4491 :                                 tmp->ssup[0] = mss->ssup[j];
     317                 :             : 
     318                 :             :                                 /* fill search key */
     319                 :        4491 :                                 key.values = &groups[i].values[j];
     320                 :        4491 :                                 key.isnull = &groups[i].isnull[j];
     321                 :             : 
     322                 :        8982 :                                 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
     323                 :             :                                                                                                 sizeof(SortItem),
     324                 :        4491 :                                                                                                 multi_sort_compare, tmp);
     325                 :             : 
     326                 :        4491 :                                 item->base_frequency *= ((double) freq->count) / numrows;
     327                 :        4491 :                         }
     328                 :        1652 :                 }
     329                 :             : 
     330                 :          36 :                 pfree(nfreqs);
     331                 :          36 :                 pfree(freqs);
     332                 :          36 :         }
     333                 :             : 
     334                 :          36 :         pfree(items);
     335                 :          36 :         pfree(groups);
     336                 :             : 
     337                 :          36 :         return mcvlist;
     338                 :          36 : }
     339                 :             : 
     340                 :             : /*
     341                 :             :  * build_mss
     342                 :             :  *              Build a MultiSortSupport for the given StatsBuildData.
     343                 :             :  */
     344                 :             : static MultiSortSupport
     345                 :          36 : build_mss(StatsBuildData *data)
     346                 :             : {
     347                 :          36 :         int                     i;
     348                 :          36 :         int                     numattrs = data->nattnums;
     349                 :             : 
     350                 :             :         /* Sort by multiple columns (using array of SortSupport) */
     351                 :          36 :         MultiSortSupport mss = multi_sort_init(numattrs);
     352                 :             : 
     353                 :             :         /* prepare the sort functions for all the attributes */
     354         [ +  + ]:         135 :         for (i = 0; i < numattrs; i++)
     355                 :             :         {
     356                 :          99 :                 VacAttrStats *colstat = data->stats[i];
     357                 :          99 :                 TypeCacheEntry *type;
     358                 :             : 
     359                 :          99 :                 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     360         [ +  - ]:          99 :                 if (type->lt_opr == InvalidOid) /* shouldn't happen */
     361   [ #  #  #  # ]:           0 :                         elog(ERROR, "cache lookup failed for ordering operator for type %u",
     362                 :             :                                  colstat->attrtypid);
     363                 :             : 
     364                 :          99 :                 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
     365                 :          99 :         }
     366                 :             : 
     367                 :          72 :         return mss;
     368                 :          36 : }
     369                 :             : 
     370                 :             : /*
     371                 :             :  * count_distinct_groups
     372                 :             :  *              Count distinct combinations of SortItems in the array.
     373                 :             :  *
     374                 :             :  * The array is assumed to be sorted according to the MultiSortSupport.
     375                 :             :  */
     376                 :             : static int
     377                 :          36 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
     378                 :             : {
     379                 :          36 :         int                     i;
     380                 :          36 :         int                     ndistinct;
     381                 :             : 
     382                 :          36 :         ndistinct = 1;
     383         [ +  + ]:       80523 :         for (i = 1; i < numrows; i++)
     384                 :             :         {
     385                 :             :                 /* make sure the array really is sorted */
     386         [ +  - ]:       80487 :                 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     387                 :             : 
     388         [ +  + ]:       80487 :                 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     389                 :       14968 :                         ndistinct += 1;
     390                 :       80487 :         }
     391                 :             : 
     392                 :          72 :         return ndistinct;
     393                 :          36 : }
     394                 :             : 
     395                 :             : /*
     396                 :             :  * compare_sort_item_count
     397                 :             :  *              Comparator for sorting items by count (frequencies) in descending
     398                 :             :  *              order.
     399                 :             :  */
     400                 :             : static int
     401                 :       16153 : compare_sort_item_count(const void *a, const void *b, void *arg)
     402                 :             : {
     403                 :       16153 :         const SortItem *ia = a;
     404                 :       16153 :         const SortItem *ib = b;
     405                 :             : 
     406         [ +  + ]:       16153 :         if (ia->count == ib->count)
     407                 :       15991 :                 return 0;
     408         [ +  + ]:         162 :         else if (ia->count > ib->count)
     409                 :         107 :                 return -1;
     410                 :             : 
     411                 :          55 :         return 1;
     412                 :       16153 : }
     413                 :             : 
     414                 :             : /*
     415                 :             :  * build_distinct_groups
     416                 :             :  *              Build an array of SortItems for distinct groups and counts matching
     417                 :             :  *              items.
     418                 :             :  *
     419                 :             :  * The 'items' array is assumed to be sorted.
     420                 :             :  */
     421                 :             : static SortItem *
     422                 :          36 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
     423                 :             :                                           int *ndistinct)
     424                 :             : {
     425                 :          36 :         int                     i,
     426                 :             :                                 j;
     427                 :          36 :         int                     ngroups = count_distinct_groups(numrows, items, mss);
     428                 :             : 
     429                 :          36 :         SortItem   *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
     430                 :             : 
     431                 :          36 :         j = 0;
     432                 :          36 :         groups[0] = items[0];
     433                 :          36 :         groups[0].count = 1;
     434                 :             : 
     435         [ +  + ]:       80523 :         for (i = 1; i < numrows; i++)
     436                 :             :         {
     437                 :             :                 /* Assume sorted in ascending order. */
     438         [ +  - ]:       80487 :                 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
     439                 :             : 
     440                 :             :                 /* New distinct group detected. */
     441         [ +  + ]:       80487 :                 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     442                 :             :                 {
     443                 :       14968 :                         groups[++j] = items[i];
     444                 :       14968 :                         groups[j].count = 0;
     445                 :       14968 :                 }
     446                 :             : 
     447                 :       80487 :                 groups[j].count++;
     448                 :       80487 :         }
     449                 :             : 
     450                 :             :         /* ensure we filled the expected number of distinct groups */
     451         [ +  - ]:          36 :         Assert(j + 1 == ngroups);
     452                 :             : 
     453                 :             :         /* Sort the distinct groups by frequency (in descending order). */
     454                 :          36 :         qsort_interruptible(groups, ngroups, sizeof(SortItem),
     455                 :             :                                                 compare_sort_item_count, NULL);
     456                 :             : 
     457                 :          36 :         *ndistinct = ngroups;
     458                 :          72 :         return groups;
     459                 :          36 : }
     460                 :             : 
     461                 :             : /* compare sort items (single dimension) */
     462                 :             : static int
     463                 :      171592 : sort_item_compare(const void *a, const void *b, void *arg)
     464                 :             : {
     465                 :      171592 :         SortSupport ssup = (SortSupport) arg;
     466                 :      171592 :         const SortItem *ia = a;
     467                 :      171592 :         const SortItem *ib = b;
     468                 :             : 
     469                 :      514776 :         return ApplySortComparator(ia->values[0], ia->isnull[0],
     470                 :      171592 :                                                            ib->values[0], ib->isnull[0],
     471                 :      171592 :                                                            ssup);
     472                 :      171592 : }
     473                 :             : 
     474                 :             : /*
     475                 :             :  * build_column_frequencies
     476                 :             :  *              Compute frequencies of values in each column.
     477                 :             :  *
     478                 :             :  * This returns an array of SortItems for each attribute the MCV is built
     479                 :             :  * on, with a frequency (number of occurrences) for each value. This is
     480                 :             :  * then used to compute "base" frequency of MCV items.
     481                 :             :  *
     482                 :             :  * All the memory is allocated in a single chunk, so that a single pfree
     483                 :             :  * is enough to release it. We do not allocate space for values/isnull
     484                 :             :  * arrays in the SortItems, because we can simply point into the input
     485                 :             :  * groups directly.
     486                 :             :  */
     487                 :             : static SortItem **
     488                 :          36 : build_column_frequencies(SortItem *groups, int ngroups,
     489                 :             :                                                  MultiSortSupport mss, int *ncounts)
     490                 :             : {
     491                 :          36 :         int                     i,
     492                 :             :                                 dim;
     493                 :          36 :         SortItem  **result;
     494                 :          36 :         char       *ptr;
     495                 :             : 
     496         [ +  - ]:          36 :         Assert(groups);
     497         [ +  - ]:          36 :         Assert(ncounts);
     498                 :             : 
     499                 :             :         /* allocate arrays for all columns as a single chunk */
     500                 :          72 :         ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
     501                 :          36 :                                  mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
     502                 :             : 
     503                 :             :         /* initial array of pointers */
     504                 :          36 :         result = (SortItem **) ptr;
     505                 :          36 :         ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
     506                 :             : 
     507         [ +  + ]:         135 :         for (dim = 0; dim < mss->ndims; dim++)
     508                 :             :         {
     509                 :          99 :                 SortSupport ssup = &mss->ssup[dim];
     510                 :             : 
     511                 :             :                 /* array of values for a single column */
     512                 :          99 :                 result[dim] = (SortItem *) ptr;
     513                 :          99 :                 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
     514                 :             : 
     515                 :             :                 /* extract data for the dimension */
     516         [ +  + ]:       41894 :                 for (i = 0; i < ngroups; i++)
     517                 :             :                 {
     518                 :             :                         /* point into the input groups */
     519                 :       41795 :                         result[dim][i].values = &groups[i].values[dim];
     520                 :       41795 :                         result[dim][i].isnull = &groups[i].isnull[dim];
     521                 :       41795 :                         result[dim][i].count = groups[i].count;
     522                 :       41795 :                 }
     523                 :             : 
     524                 :             :                 /* sort the values, deduplicate */
     525                 :         198 :                 qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
     526                 :          99 :                                                         sort_item_compare, ssup);
     527                 :             : 
     528                 :             :                 /*
     529                 :             :                  * Identify distinct values, compute frequency (there might be
     530                 :             :                  * multiple MCV items containing this value, so we need to sum counts
     531                 :             :                  * from all of them.
     532                 :             :                  */
     533                 :          99 :                 ncounts[dim] = 1;
     534         [ +  + ]:       41795 :                 for (i = 1; i < ngroups; i++)
     535                 :             :                 {
     536         [ +  + ]:       41696 :                         if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
     537                 :             :                         {
     538                 :       24710 :                                 result[dim][ncounts[dim] - 1].count += result[dim][i].count;
     539                 :       24710 :                                 continue;
     540                 :             :                         }
     541                 :             : 
     542                 :       16986 :                         result[dim][ncounts[dim]] = result[dim][i];
     543                 :             : 
     544                 :       16986 :                         ncounts[dim]++;
     545                 :       16986 :                 }
     546                 :          99 :         }
     547                 :             : 
     548                 :          72 :         return result;
     549                 :          36 : }
     550                 :             : 
     551                 :             : /*
     552                 :             :  * statext_mcv_load
     553                 :             :  *              Load the MCV list for the indicated pg_statistic_ext_data tuple.
     554                 :             :  */
     555                 :             : MCVList *
     556                 :         101 : statext_mcv_load(Oid mvoid, bool inh)
     557                 :             : {
     558                 :         101 :         MCVList    *result;
     559                 :         101 :         bool            isnull;
     560                 :         101 :         Datum           mcvlist;
     561                 :         202 :         HeapTuple       htup = SearchSysCache2(STATEXTDATASTXOID,
     562                 :         101 :                                                                            ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
     563                 :             : 
     564         [ +  - ]:         101 :         if (!HeapTupleIsValid(htup))
     565   [ #  #  #  # ]:           0 :                 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     566                 :             : 
     567                 :         101 :         mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
     568                 :             :                                                           Anum_pg_statistic_ext_data_stxdmcv, &isnull);
     569                 :             : 
     570         [ +  - ]:         101 :         if (isnull)
     571   [ #  #  #  # ]:           0 :                 elog(ERROR,
     572                 :             :                          "requested statistics kind \"%c\" is not yet built for statistics object %u",
     573                 :             :                          STATS_EXT_MCV, mvoid);
     574                 :             : 
     575                 :         101 :         result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
     576                 :             : 
     577                 :         101 :         ReleaseSysCache(htup);
     578                 :             : 
     579                 :         202 :         return result;
     580                 :         101 : }
     581                 :             : 
     582                 :             : 
     583                 :             : /*
     584                 :             :  * statext_mcv_serialize
     585                 :             :  *              Serialize MCV list into a pg_mcv_list value.
     586                 :             :  *
     587                 :             :  * The MCV items may include values of various data types, and it's reasonable
     588                 :             :  * to expect redundancy (values for a given attribute, repeated for multiple
     589                 :             :  * MCV list items). So we deduplicate the values into arrays, and then replace
     590                 :             :  * the values by indexes into those arrays.
     591                 :             :  *
     592                 :             :  * The overall structure of the serialized representation looks like this:
     593                 :             :  *
     594                 :             :  * +---------------+----------------+---------------------+-------+
     595                 :             :  * | header fields | dimension info | deduplicated values | items |
     596                 :             :  * +---------------+----------------+---------------------+-------+
     597                 :             :  *
     598                 :             :  * Where dimension info stores information about the type of the K-th
     599                 :             :  * attribute (e.g. typlen, typbyval and length of deduplicated values).
     600                 :             :  * Deduplicated values store deduplicated values for each attribute.  And
     601                 :             :  * items store the actual MCV list items, with values replaced by indexes into
     602                 :             :  * the arrays.
     603                 :             :  *
     604                 :             :  * When serializing the items, we use uint16 indexes. The number of MCV items
     605                 :             :  * is limited by the statistics target (which is capped to 10k at the moment).
     606                 :             :  * We might increase this to 65k and still fit into uint16, so there's a bit of
     607                 :             :  * slack. Furthermore, this limit is on the number of distinct values per column,
     608                 :             :  * and we usually have few of those (and various combinations of them for the
     609                 :             :  * those MCV list). So uint16 seems fine for now.
     610                 :             :  *
     611                 :             :  * We don't really expect the serialization to save as much space as for
     612                 :             :  * histograms, as we are not doing any bucket splits (which is the source
     613                 :             :  * of high redundancy in histograms).
     614                 :             :  *
     615                 :             :  * TODO: Consider packing boolean flags (NULL) for each item into a single char
     616                 :             :  * (or a longer type) instead of using an array of bool items.
     617                 :             :  */
     618                 :             : bytea *
     619                 :          36 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
     620                 :             : {
     621                 :          36 :         int                     i;
     622                 :          36 :         int                     dim;
     623                 :          36 :         int                     ndims = mcvlist->ndimensions;
     624                 :             : 
     625                 :          36 :         SortSupport ssup;
     626                 :          36 :         DimensionInfo *info;
     627                 :             : 
     628                 :          36 :         Size            total_length;
     629                 :             : 
     630                 :             :         /* serialized items (indexes into arrays, etc.) */
     631                 :          36 :         bytea      *raw;
     632                 :          36 :         char       *ptr;
     633                 :          36 :         char       *endptr PG_USED_FOR_ASSERTS_ONLY;
     634                 :             : 
     635                 :             :         /* values per dimension (and number of non-NULL values) */
     636                 :          36 :         Datum     **values = palloc0_array(Datum *, ndims);
     637                 :          36 :         int                *counts = palloc0_array(int, ndims);
     638                 :             : 
     639                 :             :         /*
     640                 :             :          * We'll include some rudimentary information about the attribute types
     641                 :             :          * (length, by-val flag), so that we don't have to look them up while
     642                 :             :          * deserializing the MCV list (we already have the type OID in the
     643                 :             :          * header).  This is safe because when changing the type of the attribute
     644                 :             :          * the statistics gets dropped automatically.  We need to store the info
     645                 :             :          * about the arrays of deduplicated values anyway.
     646                 :             :          */
     647                 :          36 :         info = palloc0_array(DimensionInfo, ndims);
     648                 :             : 
     649                 :             :         /* sort support data for all attributes included in the MCV list */
     650                 :          36 :         ssup = palloc0_array(SortSupportData, ndims);
     651                 :             : 
     652                 :             :         /* collect and deduplicate values for each dimension (attribute) */
     653         [ +  + ]:         135 :         for (dim = 0; dim < ndims; dim++)
     654                 :             :         {
     655                 :          99 :                 int                     ndistinct;
     656                 :          99 :                 TypeCacheEntry *typentry;
     657                 :             : 
     658                 :             :                 /*
     659                 :             :                  * Lookup the LT operator (can't get it from stats extra_data, as we
     660                 :             :                  * don't know how to interpret that - scalar vs. array etc.).
     661                 :             :                  */
     662                 :          99 :                 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
     663                 :             : 
     664                 :             :                 /* copy important info about the data type (length, by-value) */
     665                 :          99 :                 info[dim].typlen = stats[dim]->attrtype->typlen;
     666                 :          99 :                 info[dim].typbyval = stats[dim]->attrtype->typbyval;
     667                 :             : 
     668                 :             :                 /* allocate space for values in the attribute and collect them */
     669                 :          99 :                 values[dim] = palloc0_array(Datum, mcvlist->nitems);
     670                 :             : 
     671         [ +  + ]:        4590 :                 for (i = 0; i < mcvlist->nitems; i++)
     672                 :             :                 {
     673                 :             :                         /* skip NULL values - we don't need to deduplicate those */
     674         [ +  + ]:        4491 :                         if (mcvlist->items[i].isnull[dim])
     675                 :          13 :                                 continue;
     676                 :             : 
     677                 :             :                         /* append the value at the end */
     678                 :        4478 :                         values[dim][counts[dim]] = mcvlist->items[i].values[dim];
     679                 :        4478 :                         counts[dim] += 1;
     680                 :        4478 :                 }
     681                 :             : 
     682                 :             :                 /* if there are just NULL values in this dimension, we're done */
     683         [ +  + ]:          99 :                 if (counts[dim] == 0)
     684                 :           1 :                         continue;
     685                 :             : 
     686                 :             :                 /* sort and deduplicate the data */
     687                 :          98 :                 ssup[dim].ssup_cxt = CurrentMemoryContext;
     688                 :          98 :                 ssup[dim].ssup_collation = stats[dim]->attrcollid;
     689                 :          98 :                 ssup[dim].ssup_nulls_first = false;
     690                 :             : 
     691                 :          98 :                 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
     692                 :             : 
     693                 :         196 :                 qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
     694                 :          98 :                                                         compare_scalars_simple, &ssup[dim]);
     695                 :             : 
     696                 :             :                 /*
     697                 :             :                  * Walk through the array and eliminate duplicate values, but keep the
     698                 :             :                  * ordering (so that we can do a binary search later). We know there's
     699                 :             :                  * at least one item as (counts[dim] != 0), so we can skip the first
     700                 :             :                  * element.
     701                 :             :                  */
     702                 :          98 :                 ndistinct = 1;                  /* number of distinct values */
     703         [ +  + ]:        4478 :                 for (i = 1; i < counts[dim]; i++)
     704                 :             :                 {
     705                 :             :                         /* expect sorted array */
     706         [ +  - ]:        4380 :                         Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
     707                 :             : 
     708                 :             :                         /* if the value is the same as the previous one, we can skip it */
     709         [ +  + ]:        4380 :                         if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
     710                 :        1870 :                                 continue;
     711                 :             : 
     712                 :        2510 :                         values[dim][ndistinct] = values[dim][i];
     713                 :        2510 :                         ndistinct += 1;
     714                 :        2510 :                 }
     715                 :             : 
     716                 :             :                 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
     717         [ +  - ]:          98 :                 Assert(ndistinct <= PG_UINT16_MAX);
     718                 :             : 
     719                 :             :                 /*
     720                 :             :                  * Store additional info about the attribute - number of deduplicated
     721                 :             :                  * values, and also size of the serialized data. For fixed-length data
     722                 :             :                  * types this is trivial to compute, for varwidth types we need to
     723                 :             :                  * actually walk the array and sum the sizes.
     724                 :             :                  */
     725                 :          98 :                 info[dim].nvalues = ndistinct;
     726                 :             : 
     727         [ +  + ]:          98 :                 if (info[dim].typbyval) /* by-value data types */
     728                 :             :                 {
     729                 :          72 :                         info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     730                 :             : 
     731                 :             :                         /*
     732                 :             :                          * We copy the data into the MCV item during deserialization, so
     733                 :             :                          * we don't need to allocate any extra space.
     734                 :             :                          */
     735                 :          72 :                         info[dim].nbytes_aligned = 0;
     736                 :          72 :                 }
     737         [ +  + ]:          26 :                 else if (info[dim].typlen > 0)       /* fixed-length by-ref */
     738                 :             :                 {
     739                 :             :                         /*
     740                 :             :                          * We don't care about alignment in the serialized data, so we
     741                 :             :                          * pack the data as much as possible. But we also track how much
     742                 :             :                          * data will be needed after deserialization, and in that case we
     743                 :             :                          * need to account for alignment of each item.
     744                 :             :                          *
     745                 :             :                          * Note: As the items are fixed-length, we could easily compute
     746                 :             :                          * this during deserialization, but we do it here anyway.
     747                 :             :                          */
     748                 :           4 :                         info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
     749                 :           4 :                         info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
     750                 :           4 :                 }
     751         [ -  + ]:          22 :                 else if (info[dim].typlen == -1)        /* varlena */
     752                 :             :                 {
     753                 :          22 :                         info[dim].nbytes = 0;
     754                 :          22 :                         info[dim].nbytes_aligned = 0;
     755         [ +  + ]:         494 :                         for (i = 0; i < info[dim].nvalues; i++)
     756                 :             :                         {
     757                 :         472 :                                 Size            len;
     758                 :             : 
     759                 :             :                                 /*
     760                 :             :                                  * For varlena values, we detoast the values and store the
     761                 :             :                                  * length and data separately. We don't bother with alignment
     762                 :             :                                  * here, which means that during deserialization we need to
     763                 :             :                                  * copy the fields and only access the copies.
     764                 :             :                                  */
     765                 :         472 :                                 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
     766                 :             : 
     767                 :             :                                 /* serialized length (uint32 length + data) */
     768                 :         472 :                                 len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i]));
     769                 :         472 :                                 info[dim].nbytes += sizeof(uint32); /* length */
     770                 :         472 :                                 info[dim].nbytes += len;        /* value (no header) */
     771                 :             : 
     772                 :             :                                 /*
     773                 :             :                                  * During deserialization we'll build regular varlena values
     774                 :             :                                  * with full headers, and we need to align them properly.
     775                 :             :                                  */
     776                 :         472 :                                 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
     777                 :         472 :                         }
     778                 :          22 :                 }
     779         [ #  # ]:           0 :                 else if (info[dim].typlen == -2)        /* cstring */
     780                 :             :                 {
     781                 :           0 :                         info[dim].nbytes = 0;
     782                 :           0 :                         info[dim].nbytes_aligned = 0;
     783         [ #  # ]:           0 :                         for (i = 0; i < info[dim].nvalues; i++)
     784                 :             :                         {
     785                 :           0 :                                 Size            len;
     786                 :             : 
     787                 :             :                                 /*
     788                 :             :                                  * cstring is handled similar to varlena - first we store the
     789                 :             :                                  * length as uint32 and then the data. We don't care about
     790                 :             :                                  * alignment, which means that during deserialization we need
     791                 :             :                                  * to copy the fields and only access the copies.
     792                 :             :                                  */
     793                 :             : 
     794                 :             :                                 /* c-strings include terminator, so +1 byte */
     795                 :           0 :                                 len = strlen(DatumGetCString(values[dim][i])) + 1;
     796                 :           0 :                                 info[dim].nbytes += sizeof(uint32); /* length */
     797                 :           0 :                                 info[dim].nbytes += len;        /* value */
     798                 :             : 
     799                 :             :                                 /* space needed for properly aligned deserialized copies */
     800                 :           0 :                                 info[dim].nbytes_aligned += MAXALIGN(len);
     801                 :           0 :                         }
     802                 :           0 :                 }
     803                 :             : 
     804                 :             :                 /* we know (count>0) so there must be some data */
     805         [ +  - ]:          98 :                 Assert(info[dim].nbytes > 0);
     806      [ -  +  + ]:          99 :         }
     807                 :             : 
     808                 :             :         /*
     809                 :             :          * Now we can finally compute how much space we'll actually need for the
     810                 :             :          * whole serialized MCV list (varlena header, MCV header, dimension info
     811                 :             :          * for each attribute, deduplicated values and items).
     812                 :             :          */
     813                 :          36 :         total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
     814                 :             :                 + sizeof(AttrNumber)    /* ndimensions */
     815                 :          36 :                 + (ndims * sizeof(Oid));        /* attribute types */
     816                 :             : 
     817                 :             :         /* dimension info */
     818                 :          36 :         total_length += ndims * sizeof(DimensionInfo);
     819                 :             : 
     820                 :             :         /* add space for the arrays of deduplicated values */
     821         [ +  + ]:         135 :         for (i = 0; i < ndims; i++)
     822                 :          99 :                 total_length += info[i].nbytes;
     823                 :             : 
     824                 :             :         /*
     825                 :             :          * And finally account for the items (those are fixed-length, thanks to
     826                 :             :          * replacing values with uint16 indexes into the deduplicated arrays).
     827                 :             :          */
     828                 :          36 :         total_length += mcvlist->nitems * ITEM_SIZE(dim);
     829                 :             : 
     830                 :             :         /*
     831                 :             :          * Allocate space for the whole serialized MCV list (we'll skip bytes, so
     832                 :             :          * we set them to zero to make the result more compressible).
     833                 :             :          */
     834                 :          36 :         raw = (bytea *) palloc0(VARHDRSZ + total_length);
     835                 :          36 :         SET_VARSIZE(raw, VARHDRSZ + total_length);
     836                 :             : 
     837                 :          36 :         ptr = VARDATA(raw);
     838                 :          36 :         endptr = ptr + total_length;
     839                 :             : 
     840                 :             :         /* copy the MCV list header fields, one by one */
     841                 :          36 :         memcpy(ptr, &mcvlist->magic, sizeof(uint32));
     842                 :          36 :         ptr += sizeof(uint32);
     843                 :             : 
     844                 :          36 :         memcpy(ptr, &mcvlist->type, sizeof(uint32));
     845                 :          36 :         ptr += sizeof(uint32);
     846                 :             : 
     847                 :          36 :         memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
     848                 :          36 :         ptr += sizeof(uint32);
     849                 :             : 
     850                 :          36 :         memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
     851                 :          36 :         ptr += sizeof(AttrNumber);
     852                 :             : 
     853                 :          36 :         memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
     854                 :          36 :         ptr += (sizeof(Oid) * ndims);
     855                 :             : 
     856                 :             :         /* store information about the attributes (data amounts, ...) */
     857                 :          36 :         memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
     858                 :          36 :         ptr += sizeof(DimensionInfo) * ndims;
     859                 :             : 
     860                 :             :         /* Copy the deduplicated values for all attributes to the output. */
     861         [ +  + ]:         135 :         for (dim = 0; dim < ndims; dim++)
     862                 :             :         {
     863                 :             :                 /* remember the starting point for Asserts later */
     864                 :          99 :                 char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
     865                 :             : 
     866         [ +  + ]:        2707 :                 for (i = 0; i < info[dim].nvalues; i++)
     867                 :             :                 {
     868                 :        2608 :                         Datum           value = values[dim][i];
     869                 :             : 
     870         [ +  + ]:        2608 :                         if (info[dim].typbyval) /* passed by value */
     871                 :             :                         {
     872                 :        1951 :                                 Datum           tmp;
     873                 :             : 
     874                 :             :                                 /*
     875                 :             :                                  * For byval types, we need to copy just the significant bytes
     876                 :             :                                  * - we can't use memcpy directly, as that assumes
     877                 :             :                                  * little-endian behavior.  store_att_byval does almost what
     878                 :             :                                  * we need, but it requires a properly aligned buffer - the
     879                 :             :                                  * output buffer does not guarantee that. So we simply use a
     880                 :             :                                  * local Datum variable (which guarantees proper alignment),
     881                 :             :                                  * and then copy the value from it.
     882                 :             :                                  */
     883                 :        1951 :                                 store_att_byval(&tmp, value, info[dim].typlen);
     884                 :             : 
     885                 :        1951 :                                 memcpy(ptr, &tmp, info[dim].typlen);
     886                 :        1951 :                                 ptr += info[dim].typlen;
     887                 :        1951 :                         }
     888         [ +  + ]:         657 :                         else if (info[dim].typlen > 0)       /* passed by reference */
     889                 :             :                         {
     890                 :             :                                 /* no special alignment needed, treated as char array */
     891                 :         185 :                                 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
     892                 :         185 :                                 ptr += info[dim].typlen;
     893                 :         185 :                         }
     894         [ -  + ]:         472 :                         else if (info[dim].typlen == -1)        /* varlena */
     895                 :             :                         {
     896                 :         472 :                                 uint32          len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
     897                 :             : 
     898                 :             :                                 /* copy the length */
     899                 :         472 :                                 memcpy(ptr, &len, sizeof(uint32));
     900                 :         472 :                                 ptr += sizeof(uint32);
     901                 :             : 
     902                 :             :                                 /* data from the varlena value (without the header) */
     903                 :         472 :                                 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
     904                 :         472 :                                 ptr += len;
     905                 :         472 :                         }
     906         [ #  # ]:           0 :                         else if (info[dim].typlen == -2)        /* cstring */
     907                 :             :                         {
     908                 :           0 :                                 uint32          len = (uint32) strlen(DatumGetCString(value)) + 1;
     909                 :             : 
     910                 :             :                                 /* copy the length */
     911                 :           0 :                                 memcpy(ptr, &len, sizeof(uint32));
     912                 :           0 :                                 ptr += sizeof(uint32);
     913                 :             : 
     914                 :             :                                 /* value */
     915                 :           0 :                                 memcpy(ptr, DatumGetCString(value), len);
     916                 :           0 :                                 ptr += len;
     917                 :           0 :                         }
     918                 :             : 
     919                 :             :                         /* no underflows or overflows */
     920         [ +  - ]:        2608 :                         Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
     921                 :        2608 :                 }
     922                 :             : 
     923                 :             :                 /* we should get exactly nbytes of data for this dimension */
     924         [ +  - ]:          99 :                 Assert((ptr - start) == info[dim].nbytes);
     925                 :          99 :         }
     926                 :             : 
     927                 :             :         /* Serialize the items, with uint16 indexes instead of the values. */
     928         [ +  + ]:        1688 :         for (i = 0; i < mcvlist->nitems; i++)
     929                 :             :         {
     930                 :        1652 :                 MCVItem    *mcvitem = &mcvlist->items[i];
     931                 :             : 
     932                 :             :                 /* don't write beyond the allocated space */
     933         [ +  - ]:        1652 :                 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
     934                 :             : 
     935                 :             :                 /* copy NULL and frequency flags into the serialized MCV */
     936                 :        1652 :                 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
     937                 :        1652 :                 ptr += sizeof(bool) * ndims;
     938                 :             : 
     939                 :        1652 :                 memcpy(ptr, &mcvitem->frequency, sizeof(double));
     940                 :        1652 :                 ptr += sizeof(double);
     941                 :             : 
     942                 :        1652 :                 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
     943                 :        1652 :                 ptr += sizeof(double);
     944                 :             : 
     945                 :             :                 /* store the indexes last */
     946         [ +  + ]:        6143 :                 for (dim = 0; dim < ndims; dim++)
     947                 :             :                 {
     948                 :        4491 :                         uint16          index = 0;
     949                 :        4491 :                         Datum      *value;
     950                 :             : 
     951                 :             :                         /* do the lookup only for non-NULL values */
     952         [ +  + ]:        4491 :                         if (!mcvitem->isnull[dim])
     953                 :             :                         {
     954                 :        8956 :                                 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
     955                 :        4478 :                                                                                           info[dim].nvalues, sizeof(Datum),
     956                 :        4478 :                                                                                           compare_scalars_simple, &ssup[dim]);
     957                 :             : 
     958         [ +  - ]:        4478 :                                 Assert(value != NULL);  /* serialization or deduplication
     959                 :             :                                                                                  * error */
     960                 :             : 
     961                 :             :                                 /* compute index within the deduplicated array */
     962                 :        4478 :                                 index = (uint16) (value - values[dim]);
     963                 :             : 
     964                 :             :                                 /* check the index is within expected bounds */
     965         [ +  - ]:        4478 :                                 Assert(index < info[dim].nvalues);
     966                 :        4478 :                         }
     967                 :             : 
     968                 :             :                         /* copy the index into the serialized MCV */
     969                 :        4491 :                         memcpy(ptr, &index, sizeof(uint16));
     970                 :        4491 :                         ptr += sizeof(uint16);
     971                 :        4491 :                 }
     972                 :             : 
     973                 :             :                 /* make sure we don't overflow the allocated value */
     974         [ +  - ]:        1652 :                 Assert(ptr <= endptr);
     975                 :        1652 :         }
     976                 :             : 
     977                 :             :         /* at this point we expect to match the total_length exactly */
     978         [ +  - ]:          36 :         Assert(ptr == endptr);
     979                 :             : 
     980                 :          36 :         pfree(values);
     981                 :          36 :         pfree(counts);
     982                 :             : 
     983                 :          72 :         return raw;
     984                 :          36 : }
     985                 :             : 
     986                 :             : /*
     987                 :             :  * statext_mcv_deserialize
     988                 :             :  *              Reads serialized MCV list into MCVList structure.
     989                 :             :  *
     990                 :             :  * All the memory needed by the MCV list is allocated as a single chunk, so
     991                 :             :  * it's possible to simply pfree() it at once.
     992                 :             :  */
     993                 :             : MCVList *
     994                 :         105 : statext_mcv_deserialize(bytea *data)
     995                 :             : {
     996                 :         105 :         int                     dim,
     997                 :             :                                 i;
     998                 :         105 :         Size            expected_size;
     999                 :         105 :         MCVList    *mcvlist;
    1000                 :         105 :         char       *raw;
    1001                 :         105 :         char       *ptr;
    1002                 :         105 :         char       *endptr PG_USED_FOR_ASSERTS_ONLY;
    1003                 :             : 
    1004                 :         105 :         int                     ndims,
    1005                 :             :                                 nitems;
    1006                 :         105 :         DimensionInfo *info = NULL;
    1007                 :             : 
    1008                 :             :         /* local allocation buffer (used only for deserialization) */
    1009                 :         105 :         Datum     **map = NULL;
    1010                 :             : 
    1011                 :             :         /* MCV list */
    1012                 :         105 :         Size            mcvlen;
    1013                 :             : 
    1014                 :             :         /* buffer used for the result */
    1015                 :         105 :         Size            datalen;
    1016                 :         105 :         char       *dataptr;
    1017                 :         105 :         char       *valuesptr;
    1018                 :         105 :         char       *isnullptr;
    1019                 :             : 
    1020         [ +  - ]:         105 :         if (data == NULL)
    1021                 :           0 :                 return NULL;
    1022                 :             : 
    1023                 :             :         /*
    1024                 :             :          * We can't possibly deserialize a MCV list if there's not even a complete
    1025                 :             :          * header. We need an explicit formula here, because we serialize the
    1026                 :             :          * header fields one by one, so we need to ignore struct alignment.
    1027                 :             :          */
    1028         [ +  - ]:         105 :         if (VARSIZE_ANY(data) < MinSizeOfMCVList)
    1029   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
    1030                 :             :                          VARSIZE_ANY(data), MinSizeOfMCVList);
    1031                 :             : 
    1032                 :             :         /* read the MCV list header */
    1033                 :         105 :         mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
    1034                 :             : 
    1035                 :             :         /* pointer to the data part (skip the varlena header) */
    1036                 :         105 :         raw = (char *) data;
    1037                 :         105 :         ptr = VARDATA_ANY(raw);
    1038                 :         105 :         endptr = raw + VARSIZE_ANY(data);
    1039                 :             : 
    1040                 :             :         /* get the header and perform further sanity checks */
    1041                 :         105 :         memcpy(&mcvlist->magic, ptr, sizeof(uint32));
    1042                 :         105 :         ptr += sizeof(uint32);
    1043                 :             : 
    1044                 :         105 :         memcpy(&mcvlist->type, ptr, sizeof(uint32));
    1045                 :         105 :         ptr += sizeof(uint32);
    1046                 :             : 
    1047                 :         105 :         memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
    1048                 :         105 :         ptr += sizeof(uint32);
    1049                 :             : 
    1050                 :         105 :         memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
    1051                 :         105 :         ptr += sizeof(AttrNumber);
    1052                 :             : 
    1053         [ +  - ]:         105 :         if (mcvlist->magic != STATS_MCV_MAGIC)
    1054   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid MCV magic %u (expected %u)",
    1055                 :             :                          mcvlist->magic, STATS_MCV_MAGIC);
    1056                 :             : 
    1057         [ +  - ]:         105 :         if (mcvlist->type != STATS_MCV_TYPE_BASIC)
    1058   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid MCV type %u (expected %u)",
    1059                 :             :                          mcvlist->type, STATS_MCV_TYPE_BASIC);
    1060                 :             : 
    1061         [ +  - ]:         105 :         if (mcvlist->ndimensions == 0)
    1062   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid zero-length dimension array in MCVList");
    1063         [ +  - ]:         105 :         else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
    1064                 :         105 :                          (mcvlist->ndimensions < 0))
    1065   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid length (%d) dimension array in MCVList",
    1066                 :             :                          mcvlist->ndimensions);
    1067                 :             : 
    1068         [ +  - ]:         105 :         if (mcvlist->nitems == 0)
    1069   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid zero-length item array in MCVList");
    1070         [ +  - ]:         105 :         else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
    1071   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid length (%u) item array in MCVList",
    1072                 :             :                          mcvlist->nitems);
    1073                 :             : 
    1074                 :         105 :         nitems = mcvlist->nitems;
    1075                 :         105 :         ndims = mcvlist->ndimensions;
    1076                 :             : 
    1077                 :             :         /*
    1078                 :             :          * Check amount of data including DimensionInfo for all dimensions and
    1079                 :             :          * also the serialized items (including uint16 indexes). Also, walk
    1080                 :             :          * through the dimension information and add it to the sum.
    1081                 :             :          */
    1082                 :         105 :         expected_size = SizeOfMCVList(ndims, nitems);
    1083                 :             : 
    1084                 :             :         /*
    1085                 :             :          * Check that we have at least the dimension and info records, along with
    1086                 :             :          * the items. We don't know the size of the serialized values yet. We need
    1087                 :             :          * to do this check first, before accessing the dimension info.
    1088                 :             :          */
    1089         [ +  - ]:         105 :         if (VARSIZE_ANY(data) < expected_size)
    1090   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1091                 :             :                          VARSIZE_ANY(data), expected_size);
    1092                 :             : 
    1093                 :             :         /* Now copy the array of type Oids. */
    1094                 :         105 :         memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
    1095                 :         105 :         ptr += (sizeof(Oid) * ndims);
    1096                 :             : 
    1097                 :             :         /* Now it's safe to access the dimension info. */
    1098                 :         105 :         info = palloc(ndims * sizeof(DimensionInfo));
    1099                 :             : 
    1100                 :         105 :         memcpy(info, ptr, ndims * sizeof(DimensionInfo));
    1101                 :         105 :         ptr += (ndims * sizeof(DimensionInfo));
    1102                 :             : 
    1103                 :             :         /* account for the value arrays */
    1104         [ +  + ]:         422 :         for (dim = 0; dim < ndims; dim++)
    1105                 :             :         {
    1106                 :             :                 /*
    1107                 :             :                  * XXX I wonder if we can/should rely on asserts here. Maybe those
    1108                 :             :                  * checks should be done every time?
    1109                 :             :                  */
    1110         [ +  - ]:         317 :                 Assert(info[dim].nvalues >= 0);
    1111         [ +  - ]:         317 :                 Assert(info[dim].nbytes >= 0);
    1112                 :             : 
    1113                 :         317 :                 expected_size += info[dim].nbytes;
    1114                 :         317 :         }
    1115                 :             : 
    1116                 :             :         /*
    1117                 :             :          * Now we know the total expected MCV size, including all the pieces
    1118                 :             :          * (header, dimension info. items and deduplicated data). So do the final
    1119                 :             :          * check on size.
    1120                 :             :          */
    1121         [ +  - ]:         105 :         if (VARSIZE_ANY(data) != expected_size)
    1122   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid MCV size %zu (expected %zu)",
    1123                 :             :                          VARSIZE_ANY(data), expected_size);
    1124                 :             : 
    1125                 :             :         /*
    1126                 :             :          * We need an array of Datum values for each dimension, so that we can
    1127                 :             :          * easily translate the uint16 indexes later. We also need a top-level
    1128                 :             :          * array of pointers to those per-dimension arrays.
    1129                 :             :          *
    1130                 :             :          * While allocating the arrays for dimensions, compute how much space we
    1131                 :             :          * need for a copy of the by-ref data, as we can't simply point to the
    1132                 :             :          * original values (it might go away).
    1133                 :             :          */
    1134                 :         105 :         datalen = 0;                            /* space for by-ref data */
    1135                 :         105 :         map = palloc_array(Datum *, ndims);
    1136                 :             : 
    1137         [ +  + ]:         422 :         for (dim = 0; dim < ndims; dim++)
    1138                 :             :         {
    1139                 :         317 :                 map[dim] = palloc_array(Datum, info[dim].nvalues);
    1140                 :             : 
    1141                 :             :                 /* space needed for a copy of data for by-ref types */
    1142                 :         317 :                 datalen += info[dim].nbytes_aligned;
    1143                 :         317 :         }
    1144                 :             : 
    1145                 :             :         /*
    1146                 :             :          * Now resize the MCV list so that the allocation includes all the data.
    1147                 :             :          *
    1148                 :             :          * Allocate space for a copy of the data, as we can't simply reference the
    1149                 :             :          * serialized data - it's not aligned properly, and it may disappear while
    1150                 :             :          * we're still using the MCV list, e.g. due to catcache release.
    1151                 :             :          *
    1152                 :             :          * We do care about alignment here, because we will allocate all the
    1153                 :             :          * pieces at once, but then use pointers to different parts.
    1154                 :             :          */
    1155                 :         105 :         mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1156                 :             : 
    1157                 :             :         /* arrays of values and isnull flags for all MCV items */
    1158                 :         105 :         mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
    1159                 :         105 :         mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
    1160                 :             : 
    1161                 :             :         /* we don't quite need to align this, but it makes some asserts easier */
    1162                 :         105 :         mcvlen += MAXALIGN(datalen);
    1163                 :             : 
    1164                 :             :         /* now resize the deserialized MCV list, and compute pointers to parts */
    1165                 :         105 :         mcvlist = repalloc(mcvlist, mcvlen);
    1166                 :             : 
    1167                 :             :         /* pointer to the beginning of values/isnull arrays */
    1168                 :         210 :         valuesptr = (char *) mcvlist
    1169                 :         105 :                 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
    1170                 :             : 
    1171                 :         105 :         isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
    1172                 :             : 
    1173                 :         105 :         dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
    1174                 :             : 
    1175                 :             :         /*
    1176                 :             :          * Build mapping (index => value) for translating the serialized data into
    1177                 :             :          * the in-memory representation.
    1178                 :             :          */
    1179         [ +  + ]:         422 :         for (dim = 0; dim < ndims; dim++)
    1180                 :             :         {
    1181                 :             :                 /* remember start position in the input array */
    1182                 :         317 :                 char       *start PG_USED_FOR_ASSERTS_ONLY = ptr;
    1183                 :             : 
    1184         [ +  + ]:         317 :                 if (info[dim].typbyval)
    1185                 :             :                 {
    1186                 :             :                         /* for by-val types we simply copy data into the mapping */
    1187         [ +  + ]:        9491 :                         for (i = 0; i < info[dim].nvalues; i++)
    1188                 :             :                         {
    1189                 :        9269 :                                 Datum           v = 0;
    1190                 :             : 
    1191                 :        9269 :                                 memcpy(&v, ptr, info[dim].typlen);
    1192                 :        9269 :                                 ptr += info[dim].typlen;
    1193                 :             : 
    1194                 :        9269 :                                 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
    1195                 :             : 
    1196                 :             :                                 /* no under/overflow of input array */
    1197         [ +  - ]:        9269 :                                 Assert(ptr <= (start + info[dim].nbytes));
    1198                 :        9269 :                         }
    1199                 :         222 :                 }
    1200                 :             :                 else
    1201                 :             :                 {
    1202                 :             :                         /* for by-ref types we need to also make a copy of the data */
    1203                 :             : 
    1204                 :             :                         /* passed by reference, but fixed length (name, tid, ...) */
    1205         [ +  + ]:          95 :                         if (info[dim].typlen > 0)
    1206                 :             :                         {
    1207         [ +  + ]:         367 :                                 for (i = 0; i < info[dim].nvalues; i++)
    1208                 :             :                                 {
    1209                 :         360 :                                         memcpy(dataptr, ptr, info[dim].typlen);
    1210                 :         360 :                                         ptr += info[dim].typlen;
    1211                 :             : 
    1212                 :             :                                         /* just point into the array */
    1213                 :         360 :                                         map[dim][i] = PointerGetDatum(dataptr);
    1214                 :         360 :                                         dataptr += MAXALIGN(info[dim].typlen);
    1215                 :         360 :                                 }
    1216                 :           7 :                         }
    1217         [ -  + ]:          88 :                         else if (info[dim].typlen == -1)
    1218                 :             :                         {
    1219                 :             :                                 /* varlena */
    1220         [ +  + ]:        2504 :                                 for (i = 0; i < info[dim].nvalues; i++)
    1221                 :             :                                 {
    1222                 :        2416 :                                         uint32          len;
    1223                 :             : 
    1224                 :             :                                         /* read the uint32 length */
    1225                 :        2416 :                                         memcpy(&len, ptr, sizeof(uint32));
    1226                 :        2416 :                                         ptr += sizeof(uint32);
    1227                 :             : 
    1228                 :             :                                         /* the length is data-only */
    1229                 :        2416 :                                         SET_VARSIZE(dataptr, len + VARHDRSZ);
    1230                 :        2416 :                                         memcpy(VARDATA(dataptr), ptr, len);
    1231                 :        2416 :                                         ptr += len;
    1232                 :             : 
    1233                 :             :                                         /* just point into the array */
    1234                 :        2416 :                                         map[dim][i] = PointerGetDatum(dataptr);
    1235                 :             : 
    1236                 :             :                                         /* skip to place of the next deserialized value */
    1237                 :        2416 :                                         dataptr += MAXALIGN(len + VARHDRSZ);
    1238                 :        2416 :                                 }
    1239                 :          88 :                         }
    1240         [ #  # ]:           0 :                         else if (info[dim].typlen == -2)
    1241                 :             :                         {
    1242                 :             :                                 /* cstring */
    1243         [ #  # ]:           0 :                                 for (i = 0; i < info[dim].nvalues; i++)
    1244                 :             :                                 {
    1245                 :           0 :                                         uint32          len;
    1246                 :             : 
    1247                 :           0 :                                         memcpy(&len, ptr, sizeof(uint32));
    1248                 :           0 :                                         ptr += sizeof(uint32);
    1249                 :             : 
    1250                 :           0 :                                         memcpy(dataptr, ptr, len);
    1251                 :           0 :                                         ptr += len;
    1252                 :             : 
    1253                 :             :                                         /* just point into the array */
    1254                 :           0 :                                         map[dim][i] = PointerGetDatum(dataptr);
    1255                 :           0 :                                         dataptr += MAXALIGN(len);
    1256                 :           0 :                                 }
    1257                 :           0 :                         }
    1258                 :             : 
    1259                 :             :                         /* no under/overflow of input array */
    1260         [ -  + ]:          95 :                         Assert(ptr <= (start + info[dim].nbytes));
    1261                 :             : 
    1262                 :             :                         /* no overflow of the output mcv value */
    1263         [ -  + ]:          95 :                         Assert(dataptr <= ((char *) mcvlist + mcvlen));
    1264                 :             :                 }
    1265                 :             : 
    1266                 :             :                 /* check we consumed input data for this dimension exactly */
    1267         [ -  + ]:         317 :                 Assert(ptr == (start + info[dim].nbytes));
    1268                 :         317 :         }
    1269                 :             : 
    1270                 :             :         /* we should have also filled the MCV list exactly */
    1271         [ +  - ]:         105 :         Assert(dataptr == ((char *) mcvlist + mcvlen));
    1272                 :             : 
    1273                 :             :         /* deserialize the MCV items and translate the indexes to Datums */
    1274         [ +  + ]:        7202 :         for (i = 0; i < nitems; i++)
    1275                 :             :         {
    1276                 :        7097 :                 MCVItem    *item = &mcvlist->items[i];
    1277                 :             : 
    1278                 :        7097 :                 item->values = (Datum *) valuesptr;
    1279                 :        7097 :                 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
    1280                 :             : 
    1281                 :        7097 :                 item->isnull = (bool *) isnullptr;
    1282                 :        7097 :                 isnullptr += MAXALIGN(sizeof(bool) * ndims);
    1283                 :             : 
    1284                 :        7097 :                 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
    1285                 :        7097 :                 ptr += sizeof(bool) * ndims;
    1286                 :             : 
    1287                 :        7097 :                 memcpy(&item->frequency, ptr, sizeof(double));
    1288                 :        7097 :                 ptr += sizeof(double);
    1289                 :             : 
    1290                 :        7097 :                 memcpy(&item->base_frequency, ptr, sizeof(double));
    1291                 :        7097 :                 ptr += sizeof(double);
    1292                 :             : 
    1293                 :             :                 /* finally translate the indexes (for non-NULL only) */
    1294         [ +  + ]:       28616 :                 for (dim = 0; dim < ndims; dim++)
    1295                 :             :                 {
    1296                 :       21519 :                         uint16          index;
    1297                 :             : 
    1298                 :       21519 :                         memcpy(&index, ptr, sizeof(uint16));
    1299                 :       21519 :                         ptr += sizeof(uint16);
    1300                 :             : 
    1301         [ +  + ]:       21519 :                         if (item->isnull[dim])
    1302                 :          51 :                                 continue;
    1303                 :             : 
    1304                 :       21468 :                         item->values[dim] = map[dim][index];
    1305      [ -  +  + ]:       21519 :                 }
    1306                 :             : 
    1307                 :             :                 /* check we're not overflowing the input */
    1308         [ +  - ]:        7097 :                 Assert(ptr <= endptr);
    1309                 :        7097 :         }
    1310                 :             : 
    1311                 :             :         /* check that we processed all the data */
    1312         [ +  - ]:         105 :         Assert(ptr == endptr);
    1313                 :             : 
    1314                 :             :         /* release the buffers used for mapping */
    1315         [ +  + ]:         422 :         for (dim = 0; dim < ndims; dim++)
    1316                 :         317 :                 pfree(map[dim]);
    1317                 :             : 
    1318                 :         105 :         pfree(map);
    1319                 :             : 
    1320                 :         105 :         return mcvlist;
    1321                 :         105 : }
    1322                 :             : 
    1323                 :             : /*
    1324                 :             :  * SRF with details about buckets of a histogram:
    1325                 :             :  *
    1326                 :             :  * - item ID (0...nitems)
    1327                 :             :  * - values (string array)
    1328                 :             :  * - nulls only (boolean array)
    1329                 :             :  * - frequency (double precision)
    1330                 :             :  * - base_frequency (double precision)
    1331                 :             :  *
    1332                 :             :  * The input is the OID of the statistics, and there are no rows returned if
    1333                 :             :  * the statistics contains no histogram.
    1334                 :             :  */
    1335                 :             : Datum
    1336                 :          13 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
    1337                 :             : {
    1338                 :          13 :         FuncCallContext *funcctx;
    1339                 :             : 
    1340                 :             :         /* stuff done only on the first call of the function */
    1341         [ +  + ]:          13 :         if (SRF_IS_FIRSTCALL())
    1342                 :             :         {
    1343                 :           4 :                 MemoryContext oldcontext;
    1344                 :           4 :                 MCVList    *mcvlist;
    1345                 :           4 :                 TupleDesc       tupdesc;
    1346                 :             : 
    1347                 :             :                 /* create a function context for cross-call persistence */
    1348                 :           4 :                 funcctx = SRF_FIRSTCALL_INIT();
    1349                 :             : 
    1350                 :             :                 /* switch to memory context appropriate for multiple function calls */
    1351                 :           4 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1352                 :             : 
    1353                 :           4 :                 mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
    1354                 :             : 
    1355                 :           4 :                 funcctx->user_fctx = mcvlist;
    1356                 :             : 
    1357                 :             :                 /* total number of tuples to be returned */
    1358                 :           4 :                 funcctx->max_calls = 0;
    1359         [ -  + ]:           4 :                 if (funcctx->user_fctx != NULL)
    1360                 :           4 :                         funcctx->max_calls = mcvlist->nitems;
    1361                 :             : 
    1362                 :             :                 /* Build a tuple descriptor for our result type */
    1363         [ +  - ]:           4 :                 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    1364   [ #  #  #  # ]:           0 :                         ereport(ERROR,
    1365                 :             :                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1366                 :             :                                          errmsg("function returning record called in context "
    1367                 :             :                                                         "that cannot accept type record")));
    1368                 :           4 :                 tupdesc = BlessTupleDesc(tupdesc);
    1369                 :             : 
    1370                 :             :                 /*
    1371                 :             :                  * generate attribute metadata needed later to produce tuples from raw
    1372                 :             :                  * C strings
    1373                 :             :                  */
    1374                 :           4 :                 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    1375                 :             : 
    1376                 :           4 :                 MemoryContextSwitchTo(oldcontext);
    1377                 :           4 :         }
    1378                 :             : 
    1379                 :             :         /* stuff done on every call of the function */
    1380                 :          13 :         funcctx = SRF_PERCALL_SETUP();
    1381                 :             : 
    1382         [ +  + ]:          13 :         if (funcctx->call_cntr < funcctx->max_calls)   /* do when there is more
    1383                 :             :                                                                                                          * left to send */
    1384                 :             :         {
    1385                 :           9 :                 Datum           values[5];
    1386                 :           9 :                 bool            nulls[5];
    1387                 :           9 :                 HeapTuple       tuple;
    1388                 :           9 :                 Datum           result;
    1389                 :           9 :                 ArrayBuildState *astate_values = NULL;
    1390                 :           9 :                 ArrayBuildState *astate_nulls = NULL;
    1391                 :             : 
    1392                 :           9 :                 int                     i;
    1393                 :           9 :                 MCVList    *mcvlist;
    1394                 :           9 :                 MCVItem    *item;
    1395                 :             : 
    1396                 :           9 :                 mcvlist = (MCVList *) funcctx->user_fctx;
    1397                 :             : 
    1398         [ +  - ]:           9 :                 Assert(funcctx->call_cntr < mcvlist->nitems);
    1399                 :             : 
    1400                 :           9 :                 item = &mcvlist->items[funcctx->call_cntr];
    1401                 :             : 
    1402         [ +  + ]:          30 :                 for (i = 0; i < mcvlist->ndimensions; i++)
    1403                 :             :                 {
    1404                 :             : 
    1405                 :          42 :                         astate_nulls = accumArrayResult(astate_nulls,
    1406                 :          21 :                                                                                         BoolGetDatum(item->isnull[i]),
    1407                 :             :                                                                                         false,
    1408                 :             :                                                                                         BOOLOID,
    1409                 :          21 :                                                                                         CurrentMemoryContext);
    1410                 :             : 
    1411         [ +  + ]:          21 :                         if (!item->isnull[i])
    1412                 :             :                         {
    1413                 :          17 :                                 bool            isvarlena;
    1414                 :          17 :                                 Oid                     outfunc;
    1415                 :          17 :                                 FmgrInfo        fmgrinfo;
    1416                 :          17 :                                 Datum           val;
    1417                 :          17 :                                 text       *txt;
    1418                 :             : 
    1419                 :             :                                 /* lookup output func for the type */
    1420                 :          17 :                                 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
    1421                 :          17 :                                 fmgr_info(outfunc, &fmgrinfo);
    1422                 :             : 
    1423                 :          17 :                                 val = FunctionCall1(&fmgrinfo, item->values[i]);
    1424                 :          17 :                                 txt = cstring_to_text(DatumGetPointer(val));
    1425                 :             : 
    1426                 :          34 :                                 astate_values = accumArrayResult(astate_values,
    1427                 :          17 :                                                                                                  PointerGetDatum(txt),
    1428                 :             :                                                                                                  false,
    1429                 :             :                                                                                                  TEXTOID,
    1430                 :          17 :                                                                                                  CurrentMemoryContext);
    1431                 :          17 :                         }
    1432                 :             :                         else
    1433                 :           8 :                                 astate_values = accumArrayResult(astate_values,
    1434                 :             :                                                                                                  (Datum) 0,
    1435                 :             :                                                                                                  true,
    1436                 :             :                                                                                                  TEXTOID,
    1437                 :           4 :                                                                                                  CurrentMemoryContext);
    1438                 :          21 :                 }
    1439                 :             : 
    1440                 :           9 :                 values[0] = Int32GetDatum(funcctx->call_cntr);
    1441                 :           9 :                 values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
    1442                 :           9 :                 values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
    1443                 :           9 :                 values[3] = Float8GetDatum(item->frequency);
    1444                 :           9 :                 values[4] = Float8GetDatum(item->base_frequency);
    1445                 :             : 
    1446                 :             :                 /* no NULLs in the tuple */
    1447                 :           9 :                 memset(nulls, 0, sizeof(nulls));
    1448                 :             : 
    1449                 :             :                 /* build a tuple */
    1450                 :           9 :                 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
    1451                 :             : 
    1452                 :             :                 /* make the tuple into a datum */
    1453                 :           9 :                 result = HeapTupleGetDatum(tuple);
    1454                 :             : 
    1455                 :           9 :                 SRF_RETURN_NEXT(funcctx, result);
    1456         [ +  - ]:           9 :         }
    1457                 :             :         else                                            /* do when there is no more left */
    1458                 :             :         {
    1459         [ +  - ]:           4 :                 SRF_RETURN_DONE(funcctx);
    1460                 :             :         }
    1461         [ -  + ]:          13 : }
    1462                 :             : 
    1463                 :             : /*
    1464                 :             :  * pg_mcv_list_in               - input routine for type pg_mcv_list.
    1465                 :             :  *
    1466                 :             :  * pg_mcv_list is real enough to be a table column, but it has no operations
    1467                 :             :  * of its own, and disallows input too
    1468                 :             :  */
    1469                 :             : Datum
    1470                 :           0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
    1471                 :             : {
    1472                 :             :         /*
    1473                 :             :          * pg_mcv_list stores the data in binary form and parsing text input is
    1474                 :             :          * not needed, so disallow this.
    1475                 :             :          */
    1476   [ #  #  #  # ]:           0 :         ereport(ERROR,
    1477                 :             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1478                 :             :                          errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1479                 :             : 
    1480                 :           0 :         PG_RETURN_VOID();                       /* keep compiler quiet */
    1481                 :             : }
    1482                 :             : 
    1483                 :             : 
    1484                 :             : /*
    1485                 :             :  * pg_mcv_list_out              - output routine for type pg_mcv_list.
    1486                 :             :  *
    1487                 :             :  * MCV lists are serialized into a bytea value, so we simply call byteaout()
    1488                 :             :  * to serialize the value into text. But it'd be nice to serialize that into
    1489                 :             :  * a meaningful representation (e.g. for inspection by people).
    1490                 :             :  *
    1491                 :             :  * XXX This should probably return something meaningful, similar to what
    1492                 :             :  * pg_dependencies_out does. Not sure how to deal with the deduplicated
    1493                 :             :  * values, though - do we want to expand that or not?
    1494                 :             :  */
    1495                 :             : Datum
    1496                 :           0 : pg_mcv_list_out(PG_FUNCTION_ARGS)
    1497                 :             : {
    1498                 :           0 :         return byteaout(fcinfo);
    1499                 :             : }
    1500                 :             : 
    1501                 :             : /*
    1502                 :             :  * pg_mcv_list_recv             - binary input routine for type pg_mcv_list.
    1503                 :             :  */
    1504                 :             : Datum
    1505                 :           0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
    1506                 :             : {
    1507   [ #  #  #  # ]:           0 :         ereport(ERROR,
    1508                 :             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1509                 :             :                          errmsg("cannot accept a value of type %s", "pg_mcv_list")));
    1510                 :             : 
    1511                 :           0 :         PG_RETURN_VOID();                       /* keep compiler quiet */
    1512                 :             : }
    1513                 :             : 
    1514                 :             : /*
    1515                 :             :  * pg_mcv_list_send             - binary output routine for type pg_mcv_list.
    1516                 :             :  *
    1517                 :             :  * MCV lists are serialized in a bytea value (although the type is named
    1518                 :             :  * differently), so let's just send that.
    1519                 :             :  */
    1520                 :             : Datum
    1521                 :           0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
    1522                 :             : {
    1523                 :           0 :         return byteasend(fcinfo);
    1524                 :             : }
    1525                 :             : 
    1526                 :             : /*
    1527                 :             :  * match the attribute/expression to a dimension of the statistic
    1528                 :             :  *
    1529                 :             :  * Returns the zero-based index of the matching statistics dimension.
    1530                 :             :  * Optionally determines the collation.
    1531                 :             :  */
    1532                 :             : static int
    1533                 :         235 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
    1534                 :             : {
    1535                 :         235 :         int                     idx;
    1536                 :             : 
    1537         [ +  + ]:         235 :         if (IsA(expr, Var))
    1538                 :             :         {
    1539                 :             :                 /* simple Var, so just lookup using varattno */
    1540                 :         194 :                 Var                *var = (Var *) expr;
    1541                 :             : 
    1542         [ +  + ]:         194 :                 if (collid)
    1543                 :         183 :                         *collid = var->varcollid;
    1544                 :             : 
    1545                 :         194 :                 idx = bms_member_index(keys, var->varattno);
    1546                 :             : 
    1547         [ +  - ]:         194 :                 if (idx < 0)
    1548   [ #  #  #  # ]:           0 :                         elog(ERROR, "variable not found in statistics object");
    1549                 :         194 :         }
    1550                 :             :         else
    1551                 :             :         {
    1552                 :             :                 /* expression - lookup in stats expressions */
    1553                 :          41 :                 ListCell   *lc;
    1554                 :             : 
    1555         [ +  + ]:          41 :                 if (collid)
    1556                 :          40 :                         *collid = exprCollation(expr);
    1557                 :             : 
    1558                 :             :                 /* expressions are stored after the simple columns */
    1559                 :          41 :                 idx = bms_num_members(keys);
    1560   [ +  -  -  +  :         117 :                 foreach(lc, exprs)
                   +  - ]
    1561                 :             :                 {
    1562                 :          76 :                         Node       *stat_expr = (Node *) lfirst(lc);
    1563                 :             : 
    1564         [ +  + ]:          76 :                         if (equal(expr, stat_expr))
    1565                 :          41 :                                 break;
    1566                 :             : 
    1567                 :          35 :                         idx++;
    1568         [ +  + ]:          76 :                 }
    1569                 :             : 
    1570         [ +  - ]:          41 :                 if (lc == NULL)
    1571   [ #  #  #  # ]:           0 :                         elog(ERROR, "expression not found in statistics object");
    1572                 :          41 :         }
    1573                 :             : 
    1574                 :         470 :         return idx;
    1575                 :         235 : }
    1576                 :             : 
    1577                 :             : /*
    1578                 :             :  * mcv_get_match_bitmap
    1579                 :             :  *      Evaluate clauses using the MCV list, and update the match bitmap.
    1580                 :             :  *
    1581                 :             :  * A match bitmap keeps match/mismatch status for each MCV item, and we
    1582                 :             :  * update it based on additional clauses. We also use it to skip items
    1583                 :             :  * that can't possibly match (e.g. item marked as "mismatch" can't change
    1584                 :             :  * to "match" when evaluating AND clause list).
    1585                 :             :  *
    1586                 :             :  * The function also returns a flag indicating whether there was an
    1587                 :             :  * equality condition for all attributes, the minimum frequency in the MCV
    1588                 :             :  * list, and a total MCV frequency (sum of frequencies for all items).
    1589                 :             :  *
    1590                 :             :  * XXX Currently the match bitmap uses a bool for each MCV item, which is
    1591                 :             :  * somewhat wasteful as we could do with just a single bit, thus reducing
    1592                 :             :  * the size to ~1/8. It would also allow us to combine bitmaps simply using
    1593                 :             :  * & and |, which should be faster than min/max. The bitmaps are fairly
    1594                 :             :  * small, though (thanks to the cap on the MCV list size).
    1595                 :             :  */
    1596                 :             : static bool *
    1597                 :         141 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
    1598                 :             :                                          Bitmapset *keys, List *exprs,
    1599                 :             :                                          MCVList *mcvlist, bool is_or)
    1600                 :             : {
    1601                 :         141 :         ListCell   *l;
    1602                 :         141 :         bool       *matches;
    1603                 :             : 
    1604                 :             :         /* The bitmap may be partially built. */
    1605         [ +  - ]:         141 :         Assert(clauses != NIL);
    1606         [ +  - ]:         141 :         Assert(mcvlist != NULL);
    1607         [ +  - ]:         141 :         Assert(mcvlist->nitems > 0);
    1608         [ +  - ]:         141 :         Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
    1609                 :             : 
    1610                 :         141 :         matches = palloc_array(bool, mcvlist->nitems);
    1611                 :         141 :         memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
    1612                 :             : 
    1613                 :             :         /*
    1614                 :             :          * Loop through the list of clauses, and for each of them evaluate all the
    1615                 :             :          * MCV items not yet eliminated by the preceding clauses.
    1616                 :             :          */
    1617   [ +  -  +  +  :         405 :         foreach(l, clauses)
                   +  + ]
    1618                 :             :         {
    1619                 :       13856 :                 Node       *clause = (Node *) lfirst(l);
    1620                 :             : 
    1621                 :             :                 /* if it's a RestrictInfo, then extract the clause */
    1622         [ +  + ]:       13856 :                 if (IsA(clause, RestrictInfo))
    1623                 :         245 :                         clause = (Node *) ((RestrictInfo *) clause)->clause;
    1624                 :             : 
    1625                 :             :                 /*
    1626                 :             :                  * Handle the various types of clauses - OpClause, NullTest and
    1627                 :             :                  * AND/OR/NOT
    1628                 :             :                  */
    1629         [ +  + ]:       13856 :                 if (is_opclause(clause))
    1630                 :             :                 {
    1631                 :        9931 :                         OpExpr     *expr = (OpExpr *) clause;
    1632                 :        9931 :                         FmgrInfo        opproc;
    1633                 :             : 
    1634                 :             :                         /* valid only after examine_opclause_args returns true */
    1635                 :        9931 :                         Node       *clause_expr;
    1636                 :        9931 :                         Const      *cst;
    1637                 :        9931 :                         bool            expronleft;
    1638                 :        9931 :                         int                     idx;
    1639                 :        9931 :                         Oid                     collid;
    1640                 :             : 
    1641                 :        9931 :                         fmgr_info(get_opcode(expr->opno), &opproc);
    1642                 :             : 
    1643                 :             :                         /* extract the var/expr and const from the expression */
    1644         [ +  - ]:        9931 :                         if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1645   [ #  #  #  # ]:           0 :                                 elog(ERROR, "incompatible clause");
    1646                 :             : 
    1647                 :             :                         /* match the attribute/expression to a dimension of the statistic */
    1648                 :        9931 :                         idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1649                 :             : 
    1650                 :             :                         /*
    1651                 :             :                          * Walk through the MCV items and evaluate the current clause. We
    1652                 :             :                          * can skip items that were already ruled out, and terminate if
    1653                 :             :                          * there are no remaining MCV items that might possibly match.
    1654                 :             :                          */
    1655         [ +  + ]:       22657 :                         for (int i = 0; i < mcvlist->nitems; i++)
    1656                 :             :                         {
    1657                 :       22480 :                                 bool            match = true;
    1658                 :       22480 :                                 MCVItem    *item = &mcvlist->items[i];
    1659                 :             : 
    1660         [ +  - ]:       22480 :                                 Assert(idx >= 0);
    1661                 :             : 
    1662                 :             :                                 /*
    1663                 :             :                                  * When the MCV item or the Const value is NULL we can treat
    1664                 :             :                                  * this as a mismatch. We must not call the operator because
    1665                 :             :                                  * of strictness.
    1666                 :             :                                  */
    1667   [ +  +  +  + ]:       22480 :                                 if (item->isnull[idx] || cst->constisnull)
    1668                 :             :                                 {
    1669   [ +  +  -  +  :       19538 :                                         matches[i] = RESULT_MERGE(matches[i], is_or, false);
                   -  + ]
    1670                 :           8 :                                         continue;
    1671                 :             :                                 }
    1672                 :             : 
    1673                 :             :                                 /*
    1674                 :             :                                  * Skip MCV items that can't change result in the bitmap. Once
    1675                 :             :                                  * the value gets false for AND-lists, or true for OR-lists,
    1676                 :             :                                  * we don't need to look at more clauses.
    1677                 :             :                                  */
    1678   [ +  +  +  + ]:       22488 :                                 if (RESULT_IS_FINAL(matches[i], is_or))
    1679                 :        5087 :                                         continue;
    1680                 :             : 
    1681                 :             :                                 /*
    1682                 :             :                                  * First check whether the constant is below the lower
    1683                 :             :                                  * boundary (in that case we can skip the bucket, because
    1684                 :             :                                  * there's no overlap).
    1685                 :             :                                  *
    1686                 :             :                                  * We don't store collations used to build the statistics, but
    1687                 :             :                                  * we can use the collation for the attribute itself, as
    1688                 :             :                                  * stored in varcollid. We do reset the statistics after a
    1689                 :             :                                  * type change (including collation change), so this is OK.
    1690                 :             :                                  * For expressions, we use the collation extracted from the
    1691                 :             :                                  * expression itself.
    1692                 :             :                                  */
    1693         [ +  + ]:       17401 :                                 if (expronleft)
    1694                 :        7195 :                                         match = DatumGetBool(FunctionCall2Coll(&opproc,
    1695                 :        7195 :                                                                                                                    collid,
    1696                 :        7195 :                                                                                                                    item->values[idx],
    1697                 :        7195 :                                                                                                                    cst->constvalue));
    1698                 :             :                                 else
    1699                 :         436 :                                         match = DatumGetBool(FunctionCall2Coll(&opproc,
    1700                 :         436 :                                                                                                                    collid,
    1701                 :         436 :                                                                                                                    cst->constvalue,
    1702                 :         436 :                                                                                                                    item->values[idx]));
    1703                 :             : 
    1704                 :             :                                 /* update the match bitmap with the result */
    1705   [ +  +  -  +  :        7631 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
                   -  + ]
    1706         [ +  + ]:       12726 :                         }
    1707                 :         177 :                 }
    1708         [ +  + ]:        3925 :                 else if (IsA(clause, ScalarArrayOpExpr))
    1709                 :             :                 {
    1710                 :        3884 :                         ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
    1711                 :        3884 :                         FmgrInfo        opproc;
    1712                 :             : 
    1713                 :             :                         /* valid only after examine_opclause_args returns true */
    1714                 :        3884 :                         Node       *clause_expr;
    1715                 :        3884 :                         Const      *cst;
    1716                 :        3884 :                         bool            expronleft;
    1717                 :        3884 :                         Oid                     collid;
    1718                 :        3884 :                         int                     idx;
    1719                 :             : 
    1720                 :             :                         /* array evaluation */
    1721                 :        3884 :                         ArrayType  *arrayval;
    1722                 :        3884 :                         int16           elmlen;
    1723                 :        3884 :                         bool            elmbyval;
    1724                 :        3884 :                         char            elmalign;
    1725                 :        3884 :                         int                     num_elems;
    1726                 :        3884 :                         Datum      *elem_values;
    1727                 :        3884 :                         bool       *elem_nulls;
    1728                 :             : 
    1729                 :        3884 :                         fmgr_info(get_opcode(expr->opno), &opproc);
    1730                 :             : 
    1731                 :             :                         /* extract the var/expr and const from the expression */
    1732         [ +  - ]:        3884 :                         if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
    1733   [ #  #  #  # ]:           0 :                                 elog(ERROR, "incompatible clause");
    1734                 :             : 
    1735                 :             :                         /* We expect Var on left */
    1736         [ +  - ]:        3884 :                         if (!expronleft)
    1737   [ #  #  #  # ]:           0 :                                 elog(ERROR, "incompatible clause");
    1738                 :             : 
    1739                 :             :                         /*
    1740                 :             :                          * Deconstruct the array constant, unless it's NULL (we'll cover
    1741                 :             :                          * that case below)
    1742                 :             :                          */
    1743         [ +  + ]:        3884 :                         if (!cst->constisnull)
    1744                 :             :                         {
    1745                 :          46 :                                 arrayval = DatumGetArrayTypeP(cst->constvalue);
    1746                 :          46 :                                 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
    1747                 :             :                                                                          &elmlen, &elmbyval, &elmalign);
    1748                 :          92 :                                 deconstruct_array(arrayval,
    1749                 :          46 :                                                                   ARR_ELEMTYPE(arrayval),
    1750                 :          46 :                                                                   elmlen, elmbyval, elmalign,
    1751                 :             :                                                                   &elem_values, &elem_nulls, &num_elems);
    1752                 :          46 :                         }
    1753                 :             : 
    1754                 :             :                         /* match the attribute/expression to a dimension of the statistic */
    1755                 :        3884 :                         idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
    1756                 :             : 
    1757                 :             :                         /*
    1758                 :             :                          * Walk through the MCV items and evaluate the current clause. We
    1759                 :             :                          * can skip items that were already ruled out, and terminate if
    1760                 :             :                          * there are no remaining MCV items that might possibly match.
    1761                 :             :                          */
    1762         [ +  + ]:        7666 :                         for (int i = 0; i < mcvlist->nitems; i++)
    1763                 :             :                         {
    1764                 :        7620 :                                 int                     j;
    1765                 :        7620 :                                 bool            match = !expr->useOr;
    1766                 :        7620 :                                 MCVItem    *item = &mcvlist->items[i];
    1767                 :             : 
    1768                 :             :                                 /*
    1769                 :             :                                  * When the MCV item or the Const value is NULL we can treat
    1770                 :             :                                  * this as a mismatch. We must not call the operator because
    1771                 :             :                                  * of strictness.
    1772                 :             :                                  */
    1773   [ +  +  +  + ]:        7620 :                                 if (item->isnull[idx] || cst->constisnull)
    1774                 :             :                                 {
    1775   [ -  +  #  #  :        7686 :                                         matches[i] = RESULT_MERGE(matches[i], is_or, false);
                   +  + ]
    1776                 :           3 :                                         continue;
    1777                 :             :                                 }
    1778                 :             : 
    1779                 :             :                                 /*
    1780                 :             :                                  * Skip MCV items that can't change result in the bitmap. Once
    1781                 :             :                                  * the value gets false for AND-lists, or true for OR-lists,
    1782                 :             :                                  * we don't need to look at more clauses.
    1783                 :             :                                  */
    1784   [ +  +  +  + ]:        7623 :                                 if (RESULT_IS_FINAL(matches[i], is_or))
    1785                 :        1924 :                                         continue;
    1786                 :             : 
    1787         [ +  + ]:        9803 :                                 for (j = 0; j < num_elems; j++)
    1788                 :             :                                 {
    1789                 :        5163 :                                         Datum           elem_value = elem_values[j];
    1790                 :        5163 :                                         bool            elem_isnull = elem_nulls[j];
    1791                 :        5163 :                                         bool            elem_match;
    1792                 :             : 
    1793                 :             :                                         /* NULL values always evaluate as not matching. */
    1794         [ +  + ]:        5163 :                                         if (elem_isnull)
    1795                 :             :                                         {
    1796   [ +  -  +  +  :         352 :                                                 match = RESULT_MERGE(match, expr->useOr, false);
                   #  # ]
    1797                 :         352 :                                                 continue;
    1798                 :             :                                         }
    1799                 :             : 
    1800                 :             :                                         /*
    1801                 :             :                                          * Stop evaluating the array elements once we reach a
    1802                 :             :                                          * matching value that can't change - ALL() is the same as
    1803                 :             :                                          * AND-list, ANY() is the same as OR-list.
    1804                 :             :                                          */
    1805   [ +  +  +  + ]:        4811 :                                         if (RESULT_IS_FINAL(match, expr->useOr))
    1806                 :        1059 :                                                 break;
    1807                 :             : 
    1808                 :        3752 :                                         elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
    1809                 :        3752 :                                                                                                                                 collid,
    1810                 :        3752 :                                                                                                                                 item->values[idx],
    1811                 :        3752 :                                                                                                                                 elem_value));
    1812                 :             : 
    1813   [ +  +  -  +  :        3752 :                                         match = RESULT_MERGE(match, expr->useOr, elem_match);
                   -  + ]
    1814      [ +  +  + ]:        5163 :                                 }
    1815                 :             : 
    1816                 :             :                                 /* update the match bitmap with the result */
    1817   [ +  +  -  +  :        1855 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
                   -  + ]
    1818         [ +  + ]:        3782 :                         }
    1819                 :          46 :                 }
    1820         [ +  + ]:          41 :                 else if (IsA(clause, NullTest))
    1821                 :             :                 {
    1822                 :          11 :                         NullTest   *expr = (NullTest *) clause;
    1823                 :          11 :                         Node       *clause_expr = (Node *) (expr->arg);
    1824                 :             : 
    1825                 :             :                         /* match the attribute/expression to a dimension of the statistic */
    1826                 :          11 :                         int                     idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
    1827                 :             : 
    1828                 :             :                         /*
    1829                 :             :                          * Walk through the MCV items and evaluate the current clause. We
    1830                 :             :                          * can skip items that were already ruled out, and terminate if
    1831                 :             :                          * there are no remaining MCV items that might possibly match.
    1832                 :             :                          */
    1833         [ +  + ]:        1013 :                         for (int i = 0; i < mcvlist->nitems; i++)
    1834                 :             :                         {
    1835                 :        1002 :                                 bool            match = false;  /* assume mismatch */
    1836                 :        1002 :                                 MCVItem    *item = &mcvlist->items[i];
    1837                 :             : 
    1838                 :             :                                 /* if the clause mismatches the MCV item, update the bitmap */
    1839      [ -  +  + ]:        1002 :                                 switch (expr->nulltesttype)
    1840                 :             :                                 {
    1841                 :             :                                         case IS_NULL:
    1842         [ +  + ]:         702 :                                                 match = (item->isnull[idx]) ? true : match;
    1843                 :         702 :                                                 break;
    1844                 :             : 
    1845                 :             :                                         case IS_NOT_NULL:
    1846         [ +  + ]:         300 :                                                 match = (!item->isnull[idx]) ? true : match;
    1847                 :         300 :                                                 break;
    1848                 :             :                                 }
    1849                 :             : 
    1850                 :             :                                 /* now, update the match bitmap, depending on OR/AND type */
    1851   [ -  +  #  #  :        1002 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
                   +  + ]
    1852                 :        1002 :                         }
    1853                 :          11 :                 }
    1854   [ +  +  +  + ]:          30 :                 else if (is_orclause(clause) || is_andclause(clause))
    1855                 :             :                 {
    1856                 :             :                         /* AND/OR clause, with all subclauses being compatible */
    1857                 :             : 
    1858                 :          11 :                         int                     i;
    1859                 :          11 :                         BoolExpr   *bool_clause = ((BoolExpr *) clause);
    1860                 :          11 :                         List       *bool_clauses = bool_clause->args;
    1861                 :             : 
    1862                 :             :                         /* match/mismatch bitmap for each MCV item */
    1863                 :          11 :                         bool       *bool_matches = NULL;
    1864                 :             : 
    1865         [ -  + ]:          11 :                         Assert(bool_clauses != NIL);
    1866         [ -  + ]:          11 :                         Assert(list_length(bool_clauses) >= 2);
    1867                 :             : 
    1868                 :             :                         /* build the match bitmap for the OR-clauses */
    1869                 :          22 :                         bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
    1870                 :          11 :                                                                                                 mcvlist, is_orclause(clause));
    1871                 :             : 
    1872                 :             :                         /*
    1873                 :             :                          * Merge the bitmap produced by mcv_get_match_bitmap into the
    1874                 :             :                          * current one. We need to consider if we're evaluating AND or OR
    1875                 :             :                          * condition when merging the results.
    1876                 :             :                          */
    1877         [ +  + ]:         727 :                         for (i = 0; i < mcvlist->nitems; i++)
    1878   [ -  +  #  #  :         716 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
                   -  + ]
    1879                 :             : 
    1880                 :          11 :                         pfree(bool_matches);
    1881                 :          11 :                 }
    1882         [ +  + ]:          19 :                 else if (is_notclause(clause))
    1883                 :             :                 {
    1884                 :             :                         /* NOT clause, with all subclauses compatible */
    1885                 :             : 
    1886                 :           5 :                         int                     i;
    1887                 :           5 :                         BoolExpr   *not_clause = ((BoolExpr *) clause);
    1888                 :           5 :                         List       *not_args = not_clause->args;
    1889                 :             : 
    1890                 :             :                         /* match/mismatch bitmap for each MCV item */
    1891                 :           5 :                         bool       *not_matches = NULL;
    1892                 :             : 
    1893         [ -  + ]:           5 :                         Assert(not_args != NIL);
    1894         [ -  + ]:           5 :                         Assert(list_length(not_args) == 1);
    1895                 :             : 
    1896                 :             :                         /* build the match bitmap for the NOT-clause */
    1897                 :          10 :                         not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
    1898                 :           5 :                                                                                            mcvlist, false);
    1899                 :             : 
    1900                 :             :                         /*
    1901                 :             :                          * Merge the bitmap produced by mcv_get_match_bitmap into the
    1902                 :             :                          * current one. We're handling a NOT clause, so invert the result
    1903                 :             :                          * before merging it into the global bitmap.
    1904                 :             :                          */
    1905         [ +  + ]:          25 :                         for (i = 0; i < mcvlist->nitems; i++)
    1906   [ -  +  #  #  :          20 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
                   +  + ]
    1907                 :             : 
    1908                 :           5 :                         pfree(not_matches);
    1909                 :           5 :                 }
    1910         [ +  + ]:          14 :                 else if (IsA(clause, Var))
    1911                 :             :                 {
    1912                 :             :                         /* Var (has to be a boolean Var, possibly from below NOT) */
    1913                 :             : 
    1914                 :          13 :                         Var                *var = (Var *) (clause);
    1915                 :             : 
    1916                 :             :                         /* match the attribute to a dimension of the statistic */
    1917                 :          13 :                         int                     idx = bms_member_index(keys, var->varattno);
    1918                 :             : 
    1919         [ +  - ]:          13 :                         Assert(var->vartype == BOOLOID);
    1920                 :             : 
    1921                 :             :                         /*
    1922                 :             :                          * Walk through the MCV items and evaluate the current clause. We
    1923                 :             :                          * can skip items that were already ruled out, and terminate if
    1924                 :             :                          * there are no remaining MCV items that might possibly match.
    1925                 :             :                          */
    1926         [ +  + ]:          63 :                         for (int i = 0; i < mcvlist->nitems; i++)
    1927                 :             :                         {
    1928                 :          50 :                                 MCVItem    *item = &mcvlist->items[i];
    1929                 :          50 :                                 bool            match = false;
    1930                 :             : 
    1931                 :             :                                 /* if the item is NULL, it's a mismatch */
    1932   [ +  -  +  + ]:          50 :                                 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
    1933                 :          25 :                                         match = true;
    1934                 :             : 
    1935                 :             :                                 /* update the result bitmap */
    1936   [ +  +  -  +  :          50 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
                   +  + ]
    1937                 :          50 :                         }
    1938                 :          13 :                 }
    1939                 :             :                 else
    1940                 :             :                 {
    1941                 :             :                         /* Otherwise, it must be a bare boolean-returning expression */
    1942                 :           1 :                         int                     idx;
    1943                 :             : 
    1944                 :             :                         /* match the expression to a dimension of the statistic */
    1945                 :           1 :                         idx = mcv_match_expression(clause, keys, exprs, NULL);
    1946                 :             : 
    1947                 :             :                         /*
    1948                 :             :                          * Walk through the MCV items and evaluate the current clause. We
    1949                 :             :                          * can skip items that were already ruled out, and terminate if
    1950                 :             :                          * there are no remaining MCV items that might possibly match.
    1951                 :             :                          */
    1952         [ +  + ]:          37 :                         for (int i = 0; i < mcvlist->nitems; i++)
    1953                 :             :                         {
    1954                 :          36 :                                 bool            match;
    1955                 :          36 :                                 MCVItem    *item = &mcvlist->items[i];
    1956                 :             : 
    1957                 :             :                                 /* "match" just means it's bool TRUE */
    1958         [ -  + ]:          36 :                                 match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
    1959                 :             : 
    1960                 :             :                                 /* now, update the match bitmap, depending on OR/AND type */
    1961   [ -  +  #  #  :          36 :                                 matches[i] = RESULT_MERGE(matches[i], is_or, match);
                   -  + ]
    1962                 :          36 :                         }
    1963                 :           1 :                 }
    1964                 :         264 :         }
    1965                 :             : 
    1966                 :       26902 :         return matches;
    1967                 :       13451 : }
    1968                 :             : 
    1969                 :             : 
    1970                 :             : /*
    1971                 :             :  * mcv_combine_selectivities
    1972                 :             :  *              Combine per-column and multi-column MCV selectivity estimates.
    1973                 :             :  *
    1974                 :             :  * simple_sel is a "simple" selectivity estimate (produced without using any
    1975                 :             :  * extended statistics, essentially assuming independence of columns/clauses).
    1976                 :             :  *
    1977                 :             :  * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
    1978                 :             :  * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
    1979                 :             :  * essentially interpreted as a correction to be added to simple_sel, as
    1980                 :             :  * described below.
    1981                 :             :  *
    1982                 :             :  * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
    1983                 :             :  * matching ones).  This is used as an upper bound on the portion of the
    1984                 :             :  * selectivity estimates not covered by the MCV statistics.
    1985                 :             :  *
    1986                 :             :  * Note: While simple and base selectivities are defined in a quite similar
    1987                 :             :  * way, the values are computed differently and are not therefore equal. The
    1988                 :             :  * simple selectivity is computed as a product of per-clause estimates, while
    1989                 :             :  * the base selectivity is computed by adding up base frequencies of matching
    1990                 :             :  * items of the multi-column MCV list. So the values may differ for two main
    1991                 :             :  * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
    1992                 :             :  * the MCV items did not match the estimated clauses.
    1993                 :             :  *
    1994                 :             :  * As both (a) and (b) reduce the base selectivity value, it generally holds
    1995                 :             :  * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
    1996                 :             :  * values may be equal.
    1997                 :             :  *
    1998                 :             :  * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
    1999                 :             :  * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
    2000                 :             :  * correction for the part covered by the MCV list. Those two statements are
    2001                 :             :  * actually equivalent.
    2002                 :             :  */
    2003                 :             : Selectivity
    2004                 :         133 : mcv_combine_selectivities(Selectivity simple_sel,
    2005                 :             :                                                   Selectivity mcv_sel,
    2006                 :             :                                                   Selectivity mcv_basesel,
    2007                 :             :                                                   Selectivity mcv_totalsel)
    2008                 :             : {
    2009                 :         133 :         Selectivity other_sel;
    2010                 :         133 :         Selectivity sel;
    2011                 :             : 
    2012                 :             :         /* estimated selectivity of values not covered by MCV matches */
    2013                 :         133 :         other_sel = simple_sel - mcv_basesel;
    2014   [ +  +  +  - ]:         254 :         CLAMP_PROBABILITY(other_sel);
    2015                 :             : 
    2016                 :             :         /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
    2017         [ +  + ]:         133 :         if (other_sel > 1.0 - mcv_totalsel)
    2018                 :          74 :                 other_sel = 1.0 - mcv_totalsel;
    2019                 :             : 
    2020                 :             :         /* overall selectivity is the sum of the MCV and non-MCV parts */
    2021                 :         133 :         sel = mcv_sel + other_sel;
    2022   [ +  +  +  - ]:         258 :         CLAMP_PROBABILITY(sel);
    2023                 :             : 
    2024                 :         266 :         return sel;
    2025                 :         133 : }
    2026                 :             : 
    2027                 :             : 
    2028                 :             : /*
    2029                 :             :  * mcv_clauselist_selectivity
    2030                 :             :  *              Use MCV statistics to estimate the selectivity of an implicitly-ANDed
    2031                 :             :  *              list of clauses.
    2032                 :             :  *
    2033                 :             :  * This determines which MCV items match every clause in the list and returns
    2034                 :             :  * the sum of the frequencies of those items.
    2035                 :             :  *
    2036                 :             :  * In addition, it returns the sum of the base frequencies of each of those
    2037                 :             :  * items (that is the sum of the selectivities that each item would have if
    2038                 :             :  * the columns were independent of one another), and the total selectivity of
    2039                 :             :  * all the MCV items (not just the matching ones).  These are expected to be
    2040                 :             :  * used together with a "simple" selectivity estimate (one based only on
    2041                 :             :  * per-column statistics) to produce an overall selectivity estimate that
    2042                 :             :  * makes use of both per-column and multi-column statistics --- see
    2043                 :             :  * mcv_combine_selectivities().
    2044                 :             :  */
    2045                 :             : Selectivity
    2046                 :          85 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
    2047                 :             :                                                    List *clauses, int varRelid,
    2048                 :             :                                                    JoinType jointype, SpecialJoinInfo *sjinfo,
    2049                 :             :                                                    RelOptInfo *rel,
    2050                 :             :                                                    Selectivity *basesel, Selectivity *totalsel)
    2051                 :             : {
    2052                 :          85 :         int                     i;
    2053                 :          85 :         MCVList    *mcv;
    2054                 :          85 :         Selectivity s = 0.0;
    2055                 :          85 :         RangeTblEntry *rte = root->simple_rte_array[rel->relid];
    2056                 :             : 
    2057                 :             :         /* match/mismatch bitmap for each MCV item */
    2058                 :          85 :         bool       *matches = NULL;
    2059                 :             : 
    2060                 :             :         /* load the MCV list stored in the statistics object */
    2061                 :          85 :         mcv = statext_mcv_load(stat->statOid, rte->inh);
    2062                 :             : 
    2063                 :             :         /* build a match bitmap for the clauses */
    2064                 :         170 :         matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
    2065                 :          85 :                                                                    mcv, false);
    2066                 :             : 
    2067                 :             :         /* sum frequencies for all the matching MCV items */
    2068                 :          85 :         *basesel = 0.0;
    2069                 :          85 :         *totalsel = 0.0;
    2070         [ +  + ]:        6305 :         for (i = 0; i < mcv->nitems; i++)
    2071                 :             :         {
    2072                 :        6220 :                 *totalsel += mcv->items[i].frequency;
    2073                 :             : 
    2074         [ +  + ]:        6220 :                 if (matches[i] != false)
    2075                 :             :                 {
    2076                 :         111 :                         *basesel += mcv->items[i].base_frequency;
    2077                 :         111 :                         s += mcv->items[i].frequency;
    2078                 :         111 :                 }
    2079                 :        6220 :         }
    2080                 :             : 
    2081                 :         170 :         return s;
    2082                 :          85 : }
    2083                 :             : 
    2084                 :             : 
    2085                 :             : /*
    2086                 :             :  * mcv_clause_selectivity_or
    2087                 :             :  *              Use MCV statistics to estimate the selectivity of a clause that
    2088                 :             :  *              appears in an ORed list of clauses.
    2089                 :             :  *
    2090                 :             :  * As with mcv_clauselist_selectivity() this determines which MCV items match
    2091                 :             :  * the clause and returns both the sum of the frequencies and the sum of the
    2092                 :             :  * base frequencies of those items, as well as the sum of the frequencies of
    2093                 :             :  * all MCV items (not just the matching ones) so that this information can be
    2094                 :             :  * used by mcv_combine_selectivities() to produce a selectivity estimate that
    2095                 :             :  * makes use of both per-column and multi-column statistics.
    2096                 :             :  *
    2097                 :             :  * Additionally, we return information to help compute the overall selectivity
    2098                 :             :  * of the ORed list of clauses assumed to contain this clause.  This function
    2099                 :             :  * is intended to be called for each clause in the ORed list of clauses,
    2100                 :             :  * allowing the overall selectivity to be computed using the following
    2101                 :             :  * algorithm:
    2102                 :             :  *
    2103                 :             :  * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
    2104                 :             :  * of the first n clauses in the list.  Then the combined selectivity taking
    2105                 :             :  * into account the next clause C[n+1] can be written as
    2106                 :             :  *
    2107                 :             :  *              P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
    2108                 :             :  *
    2109                 :             :  * The final term above represents the overlap between the clauses examined so
    2110                 :             :  * far and the (n+1)'th clause.  To estimate its selectivity, we track the
    2111                 :             :  * match bitmap for the ORed list of clauses examined so far and examine its
    2112                 :             :  * intersection with the match bitmap for the (n+1)'th clause.
    2113                 :             :  *
    2114                 :             :  * We then also return the sums of the MCV item frequencies and base
    2115                 :             :  * frequencies for the match bitmap intersection corresponding to the overlap
    2116                 :             :  * term above, so that they can be combined with a simple selectivity estimate
    2117                 :             :  * for that term.
    2118                 :             :  *
    2119                 :             :  * The parameter "or_matches" is an in/out parameter tracking the match bitmap
    2120                 :             :  * for the clauses examined so far.  The caller is expected to set it to NULL
    2121                 :             :  * the first time it calls this function.
    2122                 :             :  */
    2123                 :             : Selectivity
    2124                 :          40 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
    2125                 :             :                                                   MCVList *mcv, Node *clause, bool **or_matches,
    2126                 :             :                                                   Selectivity *basesel, Selectivity *overlap_mcvsel,
    2127                 :             :                                                   Selectivity *overlap_basesel, Selectivity *totalsel)
    2128                 :             : {
    2129                 :          40 :         Selectivity s = 0.0;
    2130                 :          40 :         bool       *new_matches;
    2131                 :          40 :         int                     i;
    2132                 :             : 
    2133                 :             :         /* build the OR-matches bitmap, if not built already */
    2134         [ +  + ]:          40 :         if (*or_matches == NULL)
    2135                 :          16 :                 *or_matches = palloc0_array(bool, mcv->nitems);
    2136                 :             : 
    2137                 :             :         /* build the match bitmap for the new clause */
    2138                 :          80 :         new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
    2139                 :          40 :                                                                            stat->exprs, mcv, false);
    2140                 :             : 
    2141                 :             :         /*
    2142                 :             :          * Sum the frequencies for all the MCV items matching this clause and also
    2143                 :             :          * those matching the overlap between this clause and any of the preceding
    2144                 :             :          * clauses as described above.
    2145                 :             :          */
    2146                 :          40 :         *basesel = 0.0;
    2147                 :          40 :         *overlap_mcvsel = 0.0;
    2148                 :          40 :         *overlap_basesel = 0.0;
    2149                 :          40 :         *totalsel = 0.0;
    2150         [ +  + ]:        2586 :         for (i = 0; i < mcv->nitems; i++)
    2151                 :             :         {
    2152                 :        2546 :                 *totalsel += mcv->items[i].frequency;
    2153                 :             : 
    2154         [ +  + ]:        2546 :                 if (new_matches[i])
    2155                 :             :                 {
    2156                 :          56 :                         s += mcv->items[i].frequency;
    2157                 :          56 :                         *basesel += mcv->items[i].base_frequency;
    2158                 :             : 
    2159         [ +  + ]:          56 :                         if ((*or_matches)[i])
    2160                 :             :                         {
    2161                 :          24 :                                 *overlap_mcvsel += mcv->items[i].frequency;
    2162                 :          24 :                                 *overlap_basesel += mcv->items[i].base_frequency;
    2163                 :          24 :                         }
    2164                 :          56 :                 }
    2165                 :             : 
    2166                 :             :                 /* update the OR-matches bitmap for the next clause */
    2167         [ +  + ]:        2546 :                 (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
    2168                 :        2546 :         }
    2169                 :             : 
    2170                 :          40 :         pfree(new_matches);
    2171                 :             : 
    2172                 :          80 :         return s;
    2173                 :          40 : }
    2174                 :             : 
    2175                 :             : /*
    2176                 :             :  * Free allocations of a MCVList.
    2177                 :             :  */
    2178                 :             : void
    2179                 :           0 : statext_mcv_free(MCVList *mcvlist)
    2180                 :             : {
    2181         [ #  # ]:           0 :         for (int i = 0; i < mcvlist->nitems; i++)
    2182                 :             :         {
    2183                 :           0 :                 MCVItem    *item = &mcvlist->items[i];
    2184                 :             : 
    2185                 :           0 :                 pfree(item->values);
    2186                 :           0 :                 pfree(item->isnull);
    2187                 :           0 :         }
    2188                 :           0 :         pfree(mcvlist);
    2189                 :           0 : }
        

Generated by: LCOV version 2.3.2-1