Branch data Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * mcv.c
4 : : * POSTGRES multivariate MCV lists
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/statistics/mcv.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/htup_details.h"
18 : : #include "catalog/pg_statistic_ext.h"
19 : : #include "catalog/pg_statistic_ext_data.h"
20 : : #include "fmgr.h"
21 : : #include "funcapi.h"
22 : : #include "nodes/nodeFuncs.h"
23 : : #include "statistics/extended_stats_internal.h"
24 : : #include "statistics/statistics.h"
25 : : #include "utils/array.h"
26 : : #include "utils/builtins.h"
27 : : #include "utils/fmgrprotos.h"
28 : : #include "utils/lsyscache.h"
29 : : #include "utils/selfuncs.h"
30 : : #include "utils/syscache.h"
31 : : #include "utils/typcache.h"
32 : :
33 : : /*
34 : : * Computes size of a serialized MCV item, depending on the number of
35 : : * dimensions (columns) the statistic is defined on. The datum values are
36 : : * stored in a separate array (deduplicated, to minimize the size), and
37 : : * so the serialized items only store uint16 indexes into that array.
38 : : *
39 : : * Each serialized item stores (in this order):
40 : : *
41 : : * - indexes to values (ndim * sizeof(uint16))
42 : : * - null flags (ndim * sizeof(bool))
43 : : * - frequency (sizeof(double))
44 : : * - base_frequency (sizeof(double))
45 : : *
46 : : * There is no alignment padding within an MCV item.
47 : : * So in total each MCV item requires this many bytes:
48 : : *
49 : : * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
50 : : */
51 : : #define ITEM_SIZE(ndims) \
52 : : ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
53 : :
54 : : /*
55 : : * Used to compute size of serialized MCV list representation.
56 : : */
57 : : #define MinSizeOfMCVList \
58 : : (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
59 : :
60 : : /*
61 : : * Size of the serialized MCV list, excluding the space needed for
62 : : * deduplicated per-dimension values. The macro is meant to be used
63 : : * when it's not yet safe to access the serialized info about amount
64 : : * of data for each column.
65 : : */
66 : : #define SizeOfMCVList(ndims,nitems) \
67 : : ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
68 : : ((ndims) * sizeof(DimensionInfo)) + \
69 : : ((nitems) * ITEM_SIZE(ndims)))
70 : :
71 : : static MultiSortSupport build_mss(StatsBuildData *data);
72 : :
73 : : static SortItem *build_distinct_groups(int numrows, SortItem *items,
74 : : MultiSortSupport mss, int *ndistinct);
75 : :
76 : : static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
77 : : MultiSortSupport mss, int *ncounts);
78 : :
79 : : static int count_distinct_groups(int numrows, SortItem *items,
80 : : MultiSortSupport mss);
81 : :
82 : : /*
83 : : * Compute new value for bitmap item, considering whether it's used for
84 : : * clauses connected by AND/OR.
85 : : */
86 : : #define RESULT_MERGE(value, is_or, match) \
87 : : ((is_or) ? ((value) || (match)) : ((value) && (match)))
88 : :
89 : : /*
90 : : * When processing a list of clauses, the bitmap item may get set to a value
91 : : * such that additional clauses can't change it. For example, when processing
92 : : * a list of clauses connected to AND, as soon as the item gets set to 'false'
93 : : * then it'll remain like that. Similarly clauses connected by OR and 'true'.
94 : : *
95 : : * Returns true when the value in the bitmap can't change no matter how the
96 : : * remaining clauses are evaluated.
97 : : */
98 : : #define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
99 : :
100 : : /*
101 : : * get_mincount_for_mcv_list
102 : : * Determine the minimum number of times a value needs to appear in
103 : : * the sample for it to be included in the MCV list.
104 : : *
105 : : * We want to keep only values that appear sufficiently often in the
106 : : * sample that it is reasonable to extrapolate their sample frequencies to
107 : : * the entire table. We do this by placing an upper bound on the relative
108 : : * standard error of the sample frequency, so that any estimates the
109 : : * planner generates from the MCV statistics can be expected to be
110 : : * reasonably accurate.
111 : : *
112 : : * Since we are sampling without replacement, the sample frequency of a
113 : : * particular value is described by a hypergeometric distribution. A
114 : : * common rule of thumb when estimating errors in this situation is to
115 : : * require at least 10 instances of the value in the sample, in which case
116 : : * the distribution can be approximated by a normal distribution, and
117 : : * standard error analysis techniques can be applied. Given a sample size
118 : : * of n, a population size of N, and a sample frequency of p=cnt/n, the
119 : : * standard error of the proportion p is given by
120 : : * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
121 : : * where the second term is the finite population correction. To get
122 : : * reasonably accurate planner estimates, we impose an upper bound on the
123 : : * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
124 : : * error bound is fairly arbitrary, but has been found empirically to work
125 : : * well. Rearranging this formula gives a lower bound on the number of
126 : : * instances of the value seen:
127 : : * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
128 : : * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
129 : : * case where n approaches 0 cannot happen in practice, since the sample
130 : : * size is at least 300. The case where n approaches N corresponds to
131 : : * sampling the whole table, in which case it is reasonable to keep
132 : : * the whole MCV list (have no lower bound), so it makes sense to apply
133 : : * this formula for all inputs, even though the above derivation is
134 : : * technically only valid when the right hand side is at least around 10.
135 : : *
136 : : * An alternative way to look at this formula is as follows -- assume that
137 : : * the number of instances of the value seen scales up to the entire
138 : : * table, so that the population count is K=N*cnt/n. Then the distribution
139 : : * in the sample is a hypergeometric distribution parameterised by N, n
140 : : * and K, and the bound above is mathematically equivalent to demanding
141 : : * that the standard deviation of that distribution is less than 20% of
142 : : * its mean. Thus the relative errors in any planner estimates produced
143 : : * from the MCV statistics are likely to be not too large.
144 : : */
145 : : static double
146 : 36 : get_mincount_for_mcv_list(int samplerows, double totalrows)
147 : : {
148 : 36 : double n = samplerows;
149 : 36 : double N = totalrows;
150 : 36 : double numer,
151 : : denom;
152 : :
153 : 36 : numer = n * (N - n);
154 : 36 : denom = N - n + 0.04 * n * (N - 1);
155 : :
156 : : /* Guard against division by zero (possible if n = N = 1) */
157 [ + + ]: 36 : if (denom == 0.0)
158 : 2 : return 0.0;
159 : :
160 : 34 : return numer / denom;
161 : 36 : }
162 : :
163 : : /*
164 : : * Builds MCV list from the set of sampled rows.
165 : : *
166 : : * The algorithm is quite simple:
167 : : *
168 : : * (1) sort the data (default collation, '<' for the data type)
169 : : *
170 : : * (2) count distinct groups, decide how many to keep
171 : : *
172 : : * (3) build the MCV list using the threshold determined in (2)
173 : : *
174 : : * (4) remove rows represented by the MCV from the sample
175 : : *
176 : : */
177 : : MCVList *
178 : 36 : statext_mcv_build(StatsBuildData *data, double totalrows, int stattarget)
179 : : {
180 : 36 : int i,
181 : : numattrs,
182 : : numrows,
183 : : ngroups,
184 : : nitems;
185 : 36 : double mincount;
186 : 36 : SortItem *items;
187 : 36 : SortItem *groups;
188 : 36 : MCVList *mcvlist = NULL;
189 : 36 : MultiSortSupport mss;
190 : :
191 : : /* comparator for all the columns */
192 : 36 : mss = build_mss(data);
193 : :
194 : : /* sort the rows */
195 : 72 : items = build_sorted_items(data, &nitems, mss,
196 : 36 : data->nattnums, data->attnums);
197 : :
198 [ + - ]: 36 : if (!items)
199 : 0 : return NULL;
200 : :
201 : : /* for convenience */
202 : 36 : numattrs = data->nattnums;
203 : 36 : numrows = data->numrows;
204 : :
205 : : /* transform the sorted rows into groups (sorted by frequency) */
206 : 36 : groups = build_distinct_groups(nitems, items, mss, &ngroups);
207 : :
208 : : /*
209 : : * The maximum number of MCV items to store, based on the statistics
210 : : * target we computed for the statistics object (from the target set for
211 : : * the object itself, attributes and the system default). In any case, we
212 : : * can't keep more groups than we have available.
213 : : */
214 : 36 : nitems = stattarget;
215 [ + + ]: 36 : if (nitems > ngroups)
216 : 22 : nitems = ngroups;
217 : :
218 : : /*
219 : : * Decide how many items to keep in the MCV list. We can't use the same
220 : : * algorithm as per-column MCV lists, because that only considers the
221 : : * actual group frequency - but we're primarily interested in how the
222 : : * actual frequency differs from the base frequency (product of simple
223 : : * per-column frequencies, as if the columns were independent).
224 : : *
225 : : * Using the same algorithm might exclude items that are close to the
226 : : * "average" frequency of the sample. But that does not say whether the
227 : : * observed frequency is close to the base frequency or not. We also need
228 : : * to consider unexpectedly uncommon items (again, compared to the base
229 : : * frequency), and the single-column algorithm does not have to.
230 : : *
231 : : * We simply decide how many items to keep by computing the minimum count
232 : : * using get_mincount_for_mcv_list() and then keep all items that seem to
233 : : * be more common than that.
234 : : */
235 : 36 : mincount = get_mincount_for_mcv_list(numrows, totalrows);
236 : :
237 : : /*
238 : : * Walk the groups until we find the first group with a count below the
239 : : * mincount threshold (the index of that group is the number of groups we
240 : : * want to keep).
241 : : */
242 [ + + ]: 1688 : for (i = 0; i < nitems; i++)
243 : : {
244 [ - + ]: 1652 : if (groups[i].count < mincount)
245 : : {
246 : 0 : nitems = i;
247 : 0 : break;
248 : : }
249 : 1652 : }
250 : :
251 : : /*
252 : : * At this point, we know the number of items for the MCV list. There
253 : : * might be none (for uniform distribution with many groups), and in that
254 : : * case, there will be no MCV list. Otherwise, construct the MCV list.
255 : : */
256 [ - + ]: 36 : if (nitems > 0)
257 : : {
258 : 36 : int j;
259 : 36 : SortItem key;
260 : 36 : MultiSortSupport tmp;
261 : :
262 : : /* frequencies for values in each attribute */
263 : 36 : SortItem **freqs;
264 : 36 : int *nfreqs;
265 : :
266 : : /* used to search values */
267 : 36 : tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
268 : : + sizeof(SortSupportData));
269 : :
270 : : /* compute frequencies for values in each column */
271 : 36 : nfreqs = palloc0_array(int, numattrs);
272 : 36 : freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
273 : :
274 : : /*
275 : : * Allocate the MCV list structure, set the global parameters.
276 : : */
277 : 36 : mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
278 : 36 : sizeof(MCVItem) * nitems);
279 : :
280 : 36 : mcvlist->magic = STATS_MCV_MAGIC;
281 : 36 : mcvlist->type = STATS_MCV_TYPE_BASIC;
282 : 36 : mcvlist->ndimensions = numattrs;
283 : 36 : mcvlist->nitems = nitems;
284 : :
285 : : /* store info about data type OIDs */
286 [ + + ]: 135 : for (i = 0; i < numattrs; i++)
287 : 99 : mcvlist->types[i] = data->stats[i]->attrtypid;
288 : :
289 : : /* Copy the first chunk of groups into the result. */
290 [ + + ]: 1688 : for (i = 0; i < nitems; i++)
291 : : {
292 : : /* just point to the proper place in the list */
293 : 1652 : MCVItem *item = &mcvlist->items[i];
294 : :
295 : 1652 : item->values = palloc_array(Datum, numattrs);
296 : 1652 : item->isnull = palloc_array(bool, numattrs);
297 : :
298 : : /* copy values for the group */
299 : 1652 : memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
300 : 1652 : memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
301 : :
302 : : /* groups should be sorted by frequency in descending order */
303 [ + + + - ]: 1652 : Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
304 : :
305 : : /* group frequency */
306 : 1652 : item->frequency = (double) groups[i].count / numrows;
307 : :
308 : : /* base frequency, if the attributes were independent */
309 : 1652 : item->base_frequency = 1.0;
310 [ + + ]: 6143 : for (j = 0; j < numattrs; j++)
311 : : {
312 : 4491 : SortItem *freq;
313 : :
314 : : /* single dimension */
315 : 4491 : tmp->ndims = 1;
316 : 4491 : tmp->ssup[0] = mss->ssup[j];
317 : :
318 : : /* fill search key */
319 : 4491 : key.values = &groups[i].values[j];
320 : 4491 : key.isnull = &groups[i].isnull[j];
321 : :
322 : 8982 : freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
323 : : sizeof(SortItem),
324 : 4491 : multi_sort_compare, tmp);
325 : :
326 : 4491 : item->base_frequency *= ((double) freq->count) / numrows;
327 : 4491 : }
328 : 1652 : }
329 : :
330 : 36 : pfree(nfreqs);
331 : 36 : pfree(freqs);
332 : 36 : }
333 : :
334 : 36 : pfree(items);
335 : 36 : pfree(groups);
336 : :
337 : 36 : return mcvlist;
338 : 36 : }
339 : :
340 : : /*
341 : : * build_mss
342 : : * Build a MultiSortSupport for the given StatsBuildData.
343 : : */
344 : : static MultiSortSupport
345 : 36 : build_mss(StatsBuildData *data)
346 : : {
347 : 36 : int i;
348 : 36 : int numattrs = data->nattnums;
349 : :
350 : : /* Sort by multiple columns (using array of SortSupport) */
351 : 36 : MultiSortSupport mss = multi_sort_init(numattrs);
352 : :
353 : : /* prepare the sort functions for all the attributes */
354 [ + + ]: 135 : for (i = 0; i < numattrs; i++)
355 : : {
356 : 99 : VacAttrStats *colstat = data->stats[i];
357 : 99 : TypeCacheEntry *type;
358 : :
359 : 99 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
360 [ + - ]: 99 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
361 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
362 : : colstat->attrtypid);
363 : :
364 : 99 : multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
365 : 99 : }
366 : :
367 : 72 : return mss;
368 : 36 : }
369 : :
370 : : /*
371 : : * count_distinct_groups
372 : : * Count distinct combinations of SortItems in the array.
373 : : *
374 : : * The array is assumed to be sorted according to the MultiSortSupport.
375 : : */
376 : : static int
377 : 36 : count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
378 : : {
379 : 36 : int i;
380 : 36 : int ndistinct;
381 : :
382 : 36 : ndistinct = 1;
383 [ + + ]: 80523 : for (i = 1; i < numrows; i++)
384 : : {
385 : : /* make sure the array really is sorted */
386 [ + - ]: 80487 : Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
387 : :
388 [ + + ]: 80487 : if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
389 : 14968 : ndistinct += 1;
390 : 80487 : }
391 : :
392 : 72 : return ndistinct;
393 : 36 : }
394 : :
395 : : /*
396 : : * compare_sort_item_count
397 : : * Comparator for sorting items by count (frequencies) in descending
398 : : * order.
399 : : */
400 : : static int
401 : 16153 : compare_sort_item_count(const void *a, const void *b, void *arg)
402 : : {
403 : 16153 : const SortItem *ia = a;
404 : 16153 : const SortItem *ib = b;
405 : :
406 [ + + ]: 16153 : if (ia->count == ib->count)
407 : 15991 : return 0;
408 [ + + ]: 162 : else if (ia->count > ib->count)
409 : 107 : return -1;
410 : :
411 : 55 : return 1;
412 : 16153 : }
413 : :
414 : : /*
415 : : * build_distinct_groups
416 : : * Build an array of SortItems for distinct groups and counts matching
417 : : * items.
418 : : *
419 : : * The 'items' array is assumed to be sorted.
420 : : */
421 : : static SortItem *
422 : 36 : build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
423 : : int *ndistinct)
424 : : {
425 : 36 : int i,
426 : : j;
427 : 36 : int ngroups = count_distinct_groups(numrows, items, mss);
428 : :
429 : 36 : SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
430 : :
431 : 36 : j = 0;
432 : 36 : groups[0] = items[0];
433 : 36 : groups[0].count = 1;
434 : :
435 [ + + ]: 80523 : for (i = 1; i < numrows; i++)
436 : : {
437 : : /* Assume sorted in ascending order. */
438 [ + - ]: 80487 : Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
439 : :
440 : : /* New distinct group detected. */
441 [ + + ]: 80487 : if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
442 : : {
443 : 14968 : groups[++j] = items[i];
444 : 14968 : groups[j].count = 0;
445 : 14968 : }
446 : :
447 : 80487 : groups[j].count++;
448 : 80487 : }
449 : :
450 : : /* ensure we filled the expected number of distinct groups */
451 [ + - ]: 36 : Assert(j + 1 == ngroups);
452 : :
453 : : /* Sort the distinct groups by frequency (in descending order). */
454 : 36 : qsort_interruptible(groups, ngroups, sizeof(SortItem),
455 : : compare_sort_item_count, NULL);
456 : :
457 : 36 : *ndistinct = ngroups;
458 : 72 : return groups;
459 : 36 : }
460 : :
461 : : /* compare sort items (single dimension) */
462 : : static int
463 : 171592 : sort_item_compare(const void *a, const void *b, void *arg)
464 : : {
465 : 171592 : SortSupport ssup = (SortSupport) arg;
466 : 171592 : const SortItem *ia = a;
467 : 171592 : const SortItem *ib = b;
468 : :
469 : 514776 : return ApplySortComparator(ia->values[0], ia->isnull[0],
470 : 171592 : ib->values[0], ib->isnull[0],
471 : 171592 : ssup);
472 : 171592 : }
473 : :
474 : : /*
475 : : * build_column_frequencies
476 : : * Compute frequencies of values in each column.
477 : : *
478 : : * This returns an array of SortItems for each attribute the MCV is built
479 : : * on, with a frequency (number of occurrences) for each value. This is
480 : : * then used to compute "base" frequency of MCV items.
481 : : *
482 : : * All the memory is allocated in a single chunk, so that a single pfree
483 : : * is enough to release it. We do not allocate space for values/isnull
484 : : * arrays in the SortItems, because we can simply point into the input
485 : : * groups directly.
486 : : */
487 : : static SortItem **
488 : 36 : build_column_frequencies(SortItem *groups, int ngroups,
489 : : MultiSortSupport mss, int *ncounts)
490 : : {
491 : 36 : int i,
492 : : dim;
493 : 36 : SortItem **result;
494 : 36 : char *ptr;
495 : :
496 [ + - ]: 36 : Assert(groups);
497 [ + - ]: 36 : Assert(ncounts);
498 : :
499 : : /* allocate arrays for all columns as a single chunk */
500 : 72 : ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
501 : 36 : mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
502 : :
503 : : /* initial array of pointers */
504 : 36 : result = (SortItem **) ptr;
505 : 36 : ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
506 : :
507 [ + + ]: 135 : for (dim = 0; dim < mss->ndims; dim++)
508 : : {
509 : 99 : SortSupport ssup = &mss->ssup[dim];
510 : :
511 : : /* array of values for a single column */
512 : 99 : result[dim] = (SortItem *) ptr;
513 : 99 : ptr += MAXALIGN(sizeof(SortItem) * ngroups);
514 : :
515 : : /* extract data for the dimension */
516 [ + + ]: 41894 : for (i = 0; i < ngroups; i++)
517 : : {
518 : : /* point into the input groups */
519 : 41795 : result[dim][i].values = &groups[i].values[dim];
520 : 41795 : result[dim][i].isnull = &groups[i].isnull[dim];
521 : 41795 : result[dim][i].count = groups[i].count;
522 : 41795 : }
523 : :
524 : : /* sort the values, deduplicate */
525 : 198 : qsort_interruptible(result[dim], ngroups, sizeof(SortItem),
526 : 99 : sort_item_compare, ssup);
527 : :
528 : : /*
529 : : * Identify distinct values, compute frequency (there might be
530 : : * multiple MCV items containing this value, so we need to sum counts
531 : : * from all of them.
532 : : */
533 : 99 : ncounts[dim] = 1;
534 [ + + ]: 41795 : for (i = 1; i < ngroups; i++)
535 : : {
536 [ + + ]: 41696 : if (sort_item_compare(&result[dim][i - 1], &result[dim][i], ssup) == 0)
537 : : {
538 : 24710 : result[dim][ncounts[dim] - 1].count += result[dim][i].count;
539 : 24710 : continue;
540 : : }
541 : :
542 : 16986 : result[dim][ncounts[dim]] = result[dim][i];
543 : :
544 : 16986 : ncounts[dim]++;
545 : 16986 : }
546 : 99 : }
547 : :
548 : 72 : return result;
549 : 36 : }
550 : :
551 : : /*
552 : : * statext_mcv_load
553 : : * Load the MCV list for the indicated pg_statistic_ext_data tuple.
554 : : */
555 : : MCVList *
556 : 101 : statext_mcv_load(Oid mvoid, bool inh)
557 : : {
558 : 101 : MCVList *result;
559 : 101 : bool isnull;
560 : 101 : Datum mcvlist;
561 : 202 : HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID,
562 : 101 : ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
563 : :
564 [ + - ]: 101 : if (!HeapTupleIsValid(htup))
565 [ # # # # ]: 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
566 : :
567 : 101 : mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
568 : : Anum_pg_statistic_ext_data_stxdmcv, &isnull);
569 : :
570 [ + - ]: 101 : if (isnull)
571 [ # # # # ]: 0 : elog(ERROR,
572 : : "requested statistics kind \"%c\" is not yet built for statistics object %u",
573 : : STATS_EXT_MCV, mvoid);
574 : :
575 : 101 : result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
576 : :
577 : 101 : ReleaseSysCache(htup);
578 : :
579 : 202 : return result;
580 : 101 : }
581 : :
582 : :
583 : : /*
584 : : * statext_mcv_serialize
585 : : * Serialize MCV list into a pg_mcv_list value.
586 : : *
587 : : * The MCV items may include values of various data types, and it's reasonable
588 : : * to expect redundancy (values for a given attribute, repeated for multiple
589 : : * MCV list items). So we deduplicate the values into arrays, and then replace
590 : : * the values by indexes into those arrays.
591 : : *
592 : : * The overall structure of the serialized representation looks like this:
593 : : *
594 : : * +---------------+----------------+---------------------+-------+
595 : : * | header fields | dimension info | deduplicated values | items |
596 : : * +---------------+----------------+---------------------+-------+
597 : : *
598 : : * Where dimension info stores information about the type of the K-th
599 : : * attribute (e.g. typlen, typbyval and length of deduplicated values).
600 : : * Deduplicated values store deduplicated values for each attribute. And
601 : : * items store the actual MCV list items, with values replaced by indexes into
602 : : * the arrays.
603 : : *
604 : : * When serializing the items, we use uint16 indexes. The number of MCV items
605 : : * is limited by the statistics target (which is capped to 10k at the moment).
606 : : * We might increase this to 65k and still fit into uint16, so there's a bit of
607 : : * slack. Furthermore, this limit is on the number of distinct values per column,
608 : : * and we usually have few of those (and various combinations of them for the
609 : : * those MCV list). So uint16 seems fine for now.
610 : : *
611 : : * We don't really expect the serialization to save as much space as for
612 : : * histograms, as we are not doing any bucket splits (which is the source
613 : : * of high redundancy in histograms).
614 : : *
615 : : * TODO: Consider packing boolean flags (NULL) for each item into a single char
616 : : * (or a longer type) instead of using an array of bool items.
617 : : */
618 : : bytea *
619 : 36 : statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
620 : : {
621 : 36 : int i;
622 : 36 : int dim;
623 : 36 : int ndims = mcvlist->ndimensions;
624 : :
625 : 36 : SortSupport ssup;
626 : 36 : DimensionInfo *info;
627 : :
628 : 36 : Size total_length;
629 : :
630 : : /* serialized items (indexes into arrays, etc.) */
631 : 36 : bytea *raw;
632 : 36 : char *ptr;
633 : 36 : char *endptr PG_USED_FOR_ASSERTS_ONLY;
634 : :
635 : : /* values per dimension (and number of non-NULL values) */
636 : 36 : Datum **values = palloc0_array(Datum *, ndims);
637 : 36 : int *counts = palloc0_array(int, ndims);
638 : :
639 : : /*
640 : : * We'll include some rudimentary information about the attribute types
641 : : * (length, by-val flag), so that we don't have to look them up while
642 : : * deserializing the MCV list (we already have the type OID in the
643 : : * header). This is safe because when changing the type of the attribute
644 : : * the statistics gets dropped automatically. We need to store the info
645 : : * about the arrays of deduplicated values anyway.
646 : : */
647 : 36 : info = palloc0_array(DimensionInfo, ndims);
648 : :
649 : : /* sort support data for all attributes included in the MCV list */
650 : 36 : ssup = palloc0_array(SortSupportData, ndims);
651 : :
652 : : /* collect and deduplicate values for each dimension (attribute) */
653 [ + + ]: 135 : for (dim = 0; dim < ndims; dim++)
654 : : {
655 : 99 : int ndistinct;
656 : 99 : TypeCacheEntry *typentry;
657 : :
658 : : /*
659 : : * Lookup the LT operator (can't get it from stats extra_data, as we
660 : : * don't know how to interpret that - scalar vs. array etc.).
661 : : */
662 : 99 : typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
663 : :
664 : : /* copy important info about the data type (length, by-value) */
665 : 99 : info[dim].typlen = stats[dim]->attrtype->typlen;
666 : 99 : info[dim].typbyval = stats[dim]->attrtype->typbyval;
667 : :
668 : : /* allocate space for values in the attribute and collect them */
669 : 99 : values[dim] = palloc0_array(Datum, mcvlist->nitems);
670 : :
671 [ + + ]: 4590 : for (i = 0; i < mcvlist->nitems; i++)
672 : : {
673 : : /* skip NULL values - we don't need to deduplicate those */
674 [ + + ]: 4491 : if (mcvlist->items[i].isnull[dim])
675 : 13 : continue;
676 : :
677 : : /* append the value at the end */
678 : 4478 : values[dim][counts[dim]] = mcvlist->items[i].values[dim];
679 : 4478 : counts[dim] += 1;
680 : 4478 : }
681 : :
682 : : /* if there are just NULL values in this dimension, we're done */
683 [ + + ]: 99 : if (counts[dim] == 0)
684 : 1 : continue;
685 : :
686 : : /* sort and deduplicate the data */
687 : 98 : ssup[dim].ssup_cxt = CurrentMemoryContext;
688 : 98 : ssup[dim].ssup_collation = stats[dim]->attrcollid;
689 : 98 : ssup[dim].ssup_nulls_first = false;
690 : :
691 : 98 : PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
692 : :
693 : 196 : qsort_interruptible(values[dim], counts[dim], sizeof(Datum),
694 : 98 : compare_scalars_simple, &ssup[dim]);
695 : :
696 : : /*
697 : : * Walk through the array and eliminate duplicate values, but keep the
698 : : * ordering (so that we can do a binary search later). We know there's
699 : : * at least one item as (counts[dim] != 0), so we can skip the first
700 : : * element.
701 : : */
702 : 98 : ndistinct = 1; /* number of distinct values */
703 [ + + ]: 4478 : for (i = 1; i < counts[dim]; i++)
704 : : {
705 : : /* expect sorted array */
706 [ + - ]: 4380 : Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
707 : :
708 : : /* if the value is the same as the previous one, we can skip it */
709 [ + + ]: 4380 : if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
710 : 1870 : continue;
711 : :
712 : 2510 : values[dim][ndistinct] = values[dim][i];
713 : 2510 : ndistinct += 1;
714 : 2510 : }
715 : :
716 : : /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
717 [ + - ]: 98 : Assert(ndistinct <= PG_UINT16_MAX);
718 : :
719 : : /*
720 : : * Store additional info about the attribute - number of deduplicated
721 : : * values, and also size of the serialized data. For fixed-length data
722 : : * types this is trivial to compute, for varwidth types we need to
723 : : * actually walk the array and sum the sizes.
724 : : */
725 : 98 : info[dim].nvalues = ndistinct;
726 : :
727 [ + + ]: 98 : if (info[dim].typbyval) /* by-value data types */
728 : : {
729 : 72 : info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
730 : :
731 : : /*
732 : : * We copy the data into the MCV item during deserialization, so
733 : : * we don't need to allocate any extra space.
734 : : */
735 : 72 : info[dim].nbytes_aligned = 0;
736 : 72 : }
737 [ + + ]: 26 : else if (info[dim].typlen > 0) /* fixed-length by-ref */
738 : : {
739 : : /*
740 : : * We don't care about alignment in the serialized data, so we
741 : : * pack the data as much as possible. But we also track how much
742 : : * data will be needed after deserialization, and in that case we
743 : : * need to account for alignment of each item.
744 : : *
745 : : * Note: As the items are fixed-length, we could easily compute
746 : : * this during deserialization, but we do it here anyway.
747 : : */
748 : 4 : info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
749 : 4 : info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
750 : 4 : }
751 [ - + ]: 22 : else if (info[dim].typlen == -1) /* varlena */
752 : : {
753 : 22 : info[dim].nbytes = 0;
754 : 22 : info[dim].nbytes_aligned = 0;
755 [ + + ]: 494 : for (i = 0; i < info[dim].nvalues; i++)
756 : : {
757 : 472 : Size len;
758 : :
759 : : /*
760 : : * For varlena values, we detoast the values and store the
761 : : * length and data separately. We don't bother with alignment
762 : : * here, which means that during deserialization we need to
763 : : * copy the fields and only access the copies.
764 : : */
765 : 472 : values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
766 : :
767 : : /* serialized length (uint32 length + data) */
768 : 472 : len = VARSIZE_ANY_EXHDR(DatumGetPointer(values[dim][i]));
769 : 472 : info[dim].nbytes += sizeof(uint32); /* length */
770 : 472 : info[dim].nbytes += len; /* value (no header) */
771 : :
772 : : /*
773 : : * During deserialization we'll build regular varlena values
774 : : * with full headers, and we need to align them properly.
775 : : */
776 : 472 : info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
777 : 472 : }
778 : 22 : }
779 [ # # ]: 0 : else if (info[dim].typlen == -2) /* cstring */
780 : : {
781 : 0 : info[dim].nbytes = 0;
782 : 0 : info[dim].nbytes_aligned = 0;
783 [ # # ]: 0 : for (i = 0; i < info[dim].nvalues; i++)
784 : : {
785 : 0 : Size len;
786 : :
787 : : /*
788 : : * cstring is handled similar to varlena - first we store the
789 : : * length as uint32 and then the data. We don't care about
790 : : * alignment, which means that during deserialization we need
791 : : * to copy the fields and only access the copies.
792 : : */
793 : :
794 : : /* c-strings include terminator, so +1 byte */
795 : 0 : len = strlen(DatumGetCString(values[dim][i])) + 1;
796 : 0 : info[dim].nbytes += sizeof(uint32); /* length */
797 : 0 : info[dim].nbytes += len; /* value */
798 : :
799 : : /* space needed for properly aligned deserialized copies */
800 : 0 : info[dim].nbytes_aligned += MAXALIGN(len);
801 : 0 : }
802 : 0 : }
803 : :
804 : : /* we know (count>0) so there must be some data */
805 [ + - ]: 98 : Assert(info[dim].nbytes > 0);
806 [ - + + ]: 99 : }
807 : :
808 : : /*
809 : : * Now we can finally compute how much space we'll actually need for the
810 : : * whole serialized MCV list (varlena header, MCV header, dimension info
811 : : * for each attribute, deduplicated values and items).
812 : : */
813 : 36 : total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
814 : : + sizeof(AttrNumber) /* ndimensions */
815 : 36 : + (ndims * sizeof(Oid)); /* attribute types */
816 : :
817 : : /* dimension info */
818 : 36 : total_length += ndims * sizeof(DimensionInfo);
819 : :
820 : : /* add space for the arrays of deduplicated values */
821 [ + + ]: 135 : for (i = 0; i < ndims; i++)
822 : 99 : total_length += info[i].nbytes;
823 : :
824 : : /*
825 : : * And finally account for the items (those are fixed-length, thanks to
826 : : * replacing values with uint16 indexes into the deduplicated arrays).
827 : : */
828 : 36 : total_length += mcvlist->nitems * ITEM_SIZE(dim);
829 : :
830 : : /*
831 : : * Allocate space for the whole serialized MCV list (we'll skip bytes, so
832 : : * we set them to zero to make the result more compressible).
833 : : */
834 : 36 : raw = (bytea *) palloc0(VARHDRSZ + total_length);
835 : 36 : SET_VARSIZE(raw, VARHDRSZ + total_length);
836 : :
837 : 36 : ptr = VARDATA(raw);
838 : 36 : endptr = ptr + total_length;
839 : :
840 : : /* copy the MCV list header fields, one by one */
841 : 36 : memcpy(ptr, &mcvlist->magic, sizeof(uint32));
842 : 36 : ptr += sizeof(uint32);
843 : :
844 : 36 : memcpy(ptr, &mcvlist->type, sizeof(uint32));
845 : 36 : ptr += sizeof(uint32);
846 : :
847 : 36 : memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
848 : 36 : ptr += sizeof(uint32);
849 : :
850 : 36 : memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
851 : 36 : ptr += sizeof(AttrNumber);
852 : :
853 : 36 : memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
854 : 36 : ptr += (sizeof(Oid) * ndims);
855 : :
856 : : /* store information about the attributes (data amounts, ...) */
857 : 36 : memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
858 : 36 : ptr += sizeof(DimensionInfo) * ndims;
859 : :
860 : : /* Copy the deduplicated values for all attributes to the output. */
861 [ + + ]: 135 : for (dim = 0; dim < ndims; dim++)
862 : : {
863 : : /* remember the starting point for Asserts later */
864 : 99 : char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
865 : :
866 [ + + ]: 2707 : for (i = 0; i < info[dim].nvalues; i++)
867 : : {
868 : 2608 : Datum value = values[dim][i];
869 : :
870 [ + + ]: 2608 : if (info[dim].typbyval) /* passed by value */
871 : : {
872 : 1951 : Datum tmp;
873 : :
874 : : /*
875 : : * For byval types, we need to copy just the significant bytes
876 : : * - we can't use memcpy directly, as that assumes
877 : : * little-endian behavior. store_att_byval does almost what
878 : : * we need, but it requires a properly aligned buffer - the
879 : : * output buffer does not guarantee that. So we simply use a
880 : : * local Datum variable (which guarantees proper alignment),
881 : : * and then copy the value from it.
882 : : */
883 : 1951 : store_att_byval(&tmp, value, info[dim].typlen);
884 : :
885 : 1951 : memcpy(ptr, &tmp, info[dim].typlen);
886 : 1951 : ptr += info[dim].typlen;
887 : 1951 : }
888 [ + + ]: 657 : else if (info[dim].typlen > 0) /* passed by reference */
889 : : {
890 : : /* no special alignment needed, treated as char array */
891 : 185 : memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
892 : 185 : ptr += info[dim].typlen;
893 : 185 : }
894 [ - + ]: 472 : else if (info[dim].typlen == -1) /* varlena */
895 : : {
896 : 472 : uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
897 : :
898 : : /* copy the length */
899 : 472 : memcpy(ptr, &len, sizeof(uint32));
900 : 472 : ptr += sizeof(uint32);
901 : :
902 : : /* data from the varlena value (without the header) */
903 : 472 : memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
904 : 472 : ptr += len;
905 : 472 : }
906 [ # # ]: 0 : else if (info[dim].typlen == -2) /* cstring */
907 : : {
908 : 0 : uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
909 : :
910 : : /* copy the length */
911 : 0 : memcpy(ptr, &len, sizeof(uint32));
912 : 0 : ptr += sizeof(uint32);
913 : :
914 : : /* value */
915 : 0 : memcpy(ptr, DatumGetCString(value), len);
916 : 0 : ptr += len;
917 : 0 : }
918 : :
919 : : /* no underflows or overflows */
920 [ + - ]: 2608 : Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
921 : 2608 : }
922 : :
923 : : /* we should get exactly nbytes of data for this dimension */
924 [ + - ]: 99 : Assert((ptr - start) == info[dim].nbytes);
925 : 99 : }
926 : :
927 : : /* Serialize the items, with uint16 indexes instead of the values. */
928 [ + + ]: 1688 : for (i = 0; i < mcvlist->nitems; i++)
929 : : {
930 : 1652 : MCVItem *mcvitem = &mcvlist->items[i];
931 : :
932 : : /* don't write beyond the allocated space */
933 [ + - ]: 1652 : Assert(ptr <= (endptr - ITEM_SIZE(dim)));
934 : :
935 : : /* copy NULL and frequency flags into the serialized MCV */
936 : 1652 : memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
937 : 1652 : ptr += sizeof(bool) * ndims;
938 : :
939 : 1652 : memcpy(ptr, &mcvitem->frequency, sizeof(double));
940 : 1652 : ptr += sizeof(double);
941 : :
942 : 1652 : memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
943 : 1652 : ptr += sizeof(double);
944 : :
945 : : /* store the indexes last */
946 [ + + ]: 6143 : for (dim = 0; dim < ndims; dim++)
947 : : {
948 : 4491 : uint16 index = 0;
949 : 4491 : Datum *value;
950 : :
951 : : /* do the lookup only for non-NULL values */
952 [ + + ]: 4491 : if (!mcvitem->isnull[dim])
953 : : {
954 : 8956 : value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
955 : 4478 : info[dim].nvalues, sizeof(Datum),
956 : 4478 : compare_scalars_simple, &ssup[dim]);
957 : :
958 [ + - ]: 4478 : Assert(value != NULL); /* serialization or deduplication
959 : : * error */
960 : :
961 : : /* compute index within the deduplicated array */
962 : 4478 : index = (uint16) (value - values[dim]);
963 : :
964 : : /* check the index is within expected bounds */
965 [ + - ]: 4478 : Assert(index < info[dim].nvalues);
966 : 4478 : }
967 : :
968 : : /* copy the index into the serialized MCV */
969 : 4491 : memcpy(ptr, &index, sizeof(uint16));
970 : 4491 : ptr += sizeof(uint16);
971 : 4491 : }
972 : :
973 : : /* make sure we don't overflow the allocated value */
974 [ + - ]: 1652 : Assert(ptr <= endptr);
975 : 1652 : }
976 : :
977 : : /* at this point we expect to match the total_length exactly */
978 [ + - ]: 36 : Assert(ptr == endptr);
979 : :
980 : 36 : pfree(values);
981 : 36 : pfree(counts);
982 : :
983 : 72 : return raw;
984 : 36 : }
985 : :
986 : : /*
987 : : * statext_mcv_deserialize
988 : : * Reads serialized MCV list into MCVList structure.
989 : : *
990 : : * All the memory needed by the MCV list is allocated as a single chunk, so
991 : : * it's possible to simply pfree() it at once.
992 : : */
993 : : MCVList *
994 : 105 : statext_mcv_deserialize(bytea *data)
995 : : {
996 : 105 : int dim,
997 : : i;
998 : 105 : Size expected_size;
999 : 105 : MCVList *mcvlist;
1000 : 105 : char *raw;
1001 : 105 : char *ptr;
1002 : 105 : char *endptr PG_USED_FOR_ASSERTS_ONLY;
1003 : :
1004 : 105 : int ndims,
1005 : : nitems;
1006 : 105 : DimensionInfo *info = NULL;
1007 : :
1008 : : /* local allocation buffer (used only for deserialization) */
1009 : 105 : Datum **map = NULL;
1010 : :
1011 : : /* MCV list */
1012 : 105 : Size mcvlen;
1013 : :
1014 : : /* buffer used for the result */
1015 : 105 : Size datalen;
1016 : 105 : char *dataptr;
1017 : 105 : char *valuesptr;
1018 : 105 : char *isnullptr;
1019 : :
1020 [ + - ]: 105 : if (data == NULL)
1021 : 0 : return NULL;
1022 : :
1023 : : /*
1024 : : * We can't possibly deserialize a MCV list if there's not even a complete
1025 : : * header. We need an explicit formula here, because we serialize the
1026 : : * header fields one by one, so we need to ignore struct alignment.
1027 : : */
1028 [ + - ]: 105 : if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1029 [ # # # # ]: 0 : elog(ERROR, "invalid MCV size %zu (expected at least %zu)",
1030 : : VARSIZE_ANY(data), MinSizeOfMCVList);
1031 : :
1032 : : /* read the MCV list header */
1033 : 105 : mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1034 : :
1035 : : /* pointer to the data part (skip the varlena header) */
1036 : 105 : raw = (char *) data;
1037 : 105 : ptr = VARDATA_ANY(raw);
1038 : 105 : endptr = raw + VARSIZE_ANY(data);
1039 : :
1040 : : /* get the header and perform further sanity checks */
1041 : 105 : memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1042 : 105 : ptr += sizeof(uint32);
1043 : :
1044 : 105 : memcpy(&mcvlist->type, ptr, sizeof(uint32));
1045 : 105 : ptr += sizeof(uint32);
1046 : :
1047 : 105 : memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1048 : 105 : ptr += sizeof(uint32);
1049 : :
1050 : 105 : memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1051 : 105 : ptr += sizeof(AttrNumber);
1052 : :
1053 [ + - ]: 105 : if (mcvlist->magic != STATS_MCV_MAGIC)
1054 [ # # # # ]: 0 : elog(ERROR, "invalid MCV magic %u (expected %u)",
1055 : : mcvlist->magic, STATS_MCV_MAGIC);
1056 : :
1057 [ + - ]: 105 : if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1058 [ # # # # ]: 0 : elog(ERROR, "invalid MCV type %u (expected %u)",
1059 : : mcvlist->type, STATS_MCV_TYPE_BASIC);
1060 : :
1061 [ + - ]: 105 : if (mcvlist->ndimensions == 0)
1062 [ # # # # ]: 0 : elog(ERROR, "invalid zero-length dimension array in MCVList");
1063 [ + - ]: 105 : else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1064 : 105 : (mcvlist->ndimensions < 0))
1065 [ # # # # ]: 0 : elog(ERROR, "invalid length (%d) dimension array in MCVList",
1066 : : mcvlist->ndimensions);
1067 : :
1068 [ + - ]: 105 : if (mcvlist->nitems == 0)
1069 [ # # # # ]: 0 : elog(ERROR, "invalid zero-length item array in MCVList");
1070 [ + - ]: 105 : else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1071 [ # # # # ]: 0 : elog(ERROR, "invalid length (%u) item array in MCVList",
1072 : : mcvlist->nitems);
1073 : :
1074 : 105 : nitems = mcvlist->nitems;
1075 : 105 : ndims = mcvlist->ndimensions;
1076 : :
1077 : : /*
1078 : : * Check amount of data including DimensionInfo for all dimensions and
1079 : : * also the serialized items (including uint16 indexes). Also, walk
1080 : : * through the dimension information and add it to the sum.
1081 : : */
1082 : 105 : expected_size = SizeOfMCVList(ndims, nitems);
1083 : :
1084 : : /*
1085 : : * Check that we have at least the dimension and info records, along with
1086 : : * the items. We don't know the size of the serialized values yet. We need
1087 : : * to do this check first, before accessing the dimension info.
1088 : : */
1089 [ + - ]: 105 : if (VARSIZE_ANY(data) < expected_size)
1090 [ # # # # ]: 0 : elog(ERROR, "invalid MCV size %zu (expected %zu)",
1091 : : VARSIZE_ANY(data), expected_size);
1092 : :
1093 : : /* Now copy the array of type Oids. */
1094 : 105 : memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1095 : 105 : ptr += (sizeof(Oid) * ndims);
1096 : :
1097 : : /* Now it's safe to access the dimension info. */
1098 : 105 : info = palloc(ndims * sizeof(DimensionInfo));
1099 : :
1100 : 105 : memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1101 : 105 : ptr += (ndims * sizeof(DimensionInfo));
1102 : :
1103 : : /* account for the value arrays */
1104 [ + + ]: 422 : for (dim = 0; dim < ndims; dim++)
1105 : : {
1106 : : /*
1107 : : * XXX I wonder if we can/should rely on asserts here. Maybe those
1108 : : * checks should be done every time?
1109 : : */
1110 [ + - ]: 317 : Assert(info[dim].nvalues >= 0);
1111 [ + - ]: 317 : Assert(info[dim].nbytes >= 0);
1112 : :
1113 : 317 : expected_size += info[dim].nbytes;
1114 : 317 : }
1115 : :
1116 : : /*
1117 : : * Now we know the total expected MCV size, including all the pieces
1118 : : * (header, dimension info. items and deduplicated data). So do the final
1119 : : * check on size.
1120 : : */
1121 [ + - ]: 105 : if (VARSIZE_ANY(data) != expected_size)
1122 [ # # # # ]: 0 : elog(ERROR, "invalid MCV size %zu (expected %zu)",
1123 : : VARSIZE_ANY(data), expected_size);
1124 : :
1125 : : /*
1126 : : * We need an array of Datum values for each dimension, so that we can
1127 : : * easily translate the uint16 indexes later. We also need a top-level
1128 : : * array of pointers to those per-dimension arrays.
1129 : : *
1130 : : * While allocating the arrays for dimensions, compute how much space we
1131 : : * need for a copy of the by-ref data, as we can't simply point to the
1132 : : * original values (it might go away).
1133 : : */
1134 : 105 : datalen = 0; /* space for by-ref data */
1135 : 105 : map = palloc_array(Datum *, ndims);
1136 : :
1137 [ + + ]: 422 : for (dim = 0; dim < ndims; dim++)
1138 : : {
1139 : 317 : map[dim] = palloc_array(Datum, info[dim].nvalues);
1140 : :
1141 : : /* space needed for a copy of data for by-ref types */
1142 : 317 : datalen += info[dim].nbytes_aligned;
1143 : 317 : }
1144 : :
1145 : : /*
1146 : : * Now resize the MCV list so that the allocation includes all the data.
1147 : : *
1148 : : * Allocate space for a copy of the data, as we can't simply reference the
1149 : : * serialized data - it's not aligned properly, and it may disappear while
1150 : : * we're still using the MCV list, e.g. due to catcache release.
1151 : : *
1152 : : * We do care about alignment here, because we will allocate all the
1153 : : * pieces at once, but then use pointers to different parts.
1154 : : */
1155 : 105 : mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1156 : :
1157 : : /* arrays of values and isnull flags for all MCV items */
1158 : 105 : mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1159 : 105 : mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1160 : :
1161 : : /* we don't quite need to align this, but it makes some asserts easier */
1162 : 105 : mcvlen += MAXALIGN(datalen);
1163 : :
1164 : : /* now resize the deserialized MCV list, and compute pointers to parts */
1165 : 105 : mcvlist = repalloc(mcvlist, mcvlen);
1166 : :
1167 : : /* pointer to the beginning of values/isnull arrays */
1168 : 210 : valuesptr = (char *) mcvlist
1169 : 105 : + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1170 : :
1171 : 105 : isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1172 : :
1173 : 105 : dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1174 : :
1175 : : /*
1176 : : * Build mapping (index => value) for translating the serialized data into
1177 : : * the in-memory representation.
1178 : : */
1179 [ + + ]: 422 : for (dim = 0; dim < ndims; dim++)
1180 : : {
1181 : : /* remember start position in the input array */
1182 : 317 : char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1183 : :
1184 [ + + ]: 317 : if (info[dim].typbyval)
1185 : : {
1186 : : /* for by-val types we simply copy data into the mapping */
1187 [ + + ]: 9491 : for (i = 0; i < info[dim].nvalues; i++)
1188 : : {
1189 : 9269 : Datum v = 0;
1190 : :
1191 : 9269 : memcpy(&v, ptr, info[dim].typlen);
1192 : 9269 : ptr += info[dim].typlen;
1193 : :
1194 : 9269 : map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1195 : :
1196 : : /* no under/overflow of input array */
1197 [ + - ]: 9269 : Assert(ptr <= (start + info[dim].nbytes));
1198 : 9269 : }
1199 : 222 : }
1200 : : else
1201 : : {
1202 : : /* for by-ref types we need to also make a copy of the data */
1203 : :
1204 : : /* passed by reference, but fixed length (name, tid, ...) */
1205 [ + + ]: 95 : if (info[dim].typlen > 0)
1206 : : {
1207 [ + + ]: 367 : for (i = 0; i < info[dim].nvalues; i++)
1208 : : {
1209 : 360 : memcpy(dataptr, ptr, info[dim].typlen);
1210 : 360 : ptr += info[dim].typlen;
1211 : :
1212 : : /* just point into the array */
1213 : 360 : map[dim][i] = PointerGetDatum(dataptr);
1214 : 360 : dataptr += MAXALIGN(info[dim].typlen);
1215 : 360 : }
1216 : 7 : }
1217 [ - + ]: 88 : else if (info[dim].typlen == -1)
1218 : : {
1219 : : /* varlena */
1220 [ + + ]: 2504 : for (i = 0; i < info[dim].nvalues; i++)
1221 : : {
1222 : 2416 : uint32 len;
1223 : :
1224 : : /* read the uint32 length */
1225 : 2416 : memcpy(&len, ptr, sizeof(uint32));
1226 : 2416 : ptr += sizeof(uint32);
1227 : :
1228 : : /* the length is data-only */
1229 : 2416 : SET_VARSIZE(dataptr, len + VARHDRSZ);
1230 : 2416 : memcpy(VARDATA(dataptr), ptr, len);
1231 : 2416 : ptr += len;
1232 : :
1233 : : /* just point into the array */
1234 : 2416 : map[dim][i] = PointerGetDatum(dataptr);
1235 : :
1236 : : /* skip to place of the next deserialized value */
1237 : 2416 : dataptr += MAXALIGN(len + VARHDRSZ);
1238 : 2416 : }
1239 : 88 : }
1240 [ # # ]: 0 : else if (info[dim].typlen == -2)
1241 : : {
1242 : : /* cstring */
1243 [ # # ]: 0 : for (i = 0; i < info[dim].nvalues; i++)
1244 : : {
1245 : 0 : uint32 len;
1246 : :
1247 : 0 : memcpy(&len, ptr, sizeof(uint32));
1248 : 0 : ptr += sizeof(uint32);
1249 : :
1250 : 0 : memcpy(dataptr, ptr, len);
1251 : 0 : ptr += len;
1252 : :
1253 : : /* just point into the array */
1254 : 0 : map[dim][i] = PointerGetDatum(dataptr);
1255 : 0 : dataptr += MAXALIGN(len);
1256 : 0 : }
1257 : 0 : }
1258 : :
1259 : : /* no under/overflow of input array */
1260 [ - + ]: 95 : Assert(ptr <= (start + info[dim].nbytes));
1261 : :
1262 : : /* no overflow of the output mcv value */
1263 [ - + ]: 95 : Assert(dataptr <= ((char *) mcvlist + mcvlen));
1264 : : }
1265 : :
1266 : : /* check we consumed input data for this dimension exactly */
1267 [ - + ]: 317 : Assert(ptr == (start + info[dim].nbytes));
1268 : 317 : }
1269 : :
1270 : : /* we should have also filled the MCV list exactly */
1271 [ + - ]: 105 : Assert(dataptr == ((char *) mcvlist + mcvlen));
1272 : :
1273 : : /* deserialize the MCV items and translate the indexes to Datums */
1274 [ + + ]: 7202 : for (i = 0; i < nitems; i++)
1275 : : {
1276 : 7097 : MCVItem *item = &mcvlist->items[i];
1277 : :
1278 : 7097 : item->values = (Datum *) valuesptr;
1279 : 7097 : valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1280 : :
1281 : 7097 : item->isnull = (bool *) isnullptr;
1282 : 7097 : isnullptr += MAXALIGN(sizeof(bool) * ndims);
1283 : :
1284 : 7097 : memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1285 : 7097 : ptr += sizeof(bool) * ndims;
1286 : :
1287 : 7097 : memcpy(&item->frequency, ptr, sizeof(double));
1288 : 7097 : ptr += sizeof(double);
1289 : :
1290 : 7097 : memcpy(&item->base_frequency, ptr, sizeof(double));
1291 : 7097 : ptr += sizeof(double);
1292 : :
1293 : : /* finally translate the indexes (for non-NULL only) */
1294 [ + + ]: 28616 : for (dim = 0; dim < ndims; dim++)
1295 : : {
1296 : 21519 : uint16 index;
1297 : :
1298 : 21519 : memcpy(&index, ptr, sizeof(uint16));
1299 : 21519 : ptr += sizeof(uint16);
1300 : :
1301 [ + + ]: 21519 : if (item->isnull[dim])
1302 : 51 : continue;
1303 : :
1304 : 21468 : item->values[dim] = map[dim][index];
1305 [ - + + ]: 21519 : }
1306 : :
1307 : : /* check we're not overflowing the input */
1308 [ + - ]: 7097 : Assert(ptr <= endptr);
1309 : 7097 : }
1310 : :
1311 : : /* check that we processed all the data */
1312 [ + - ]: 105 : Assert(ptr == endptr);
1313 : :
1314 : : /* release the buffers used for mapping */
1315 [ + + ]: 422 : for (dim = 0; dim < ndims; dim++)
1316 : 317 : pfree(map[dim]);
1317 : :
1318 : 105 : pfree(map);
1319 : :
1320 : 105 : return mcvlist;
1321 : 105 : }
1322 : :
1323 : : /*
1324 : : * SRF with details about buckets of a histogram:
1325 : : *
1326 : : * - item ID (0...nitems)
1327 : : * - values (string array)
1328 : : * - nulls only (boolean array)
1329 : : * - frequency (double precision)
1330 : : * - base_frequency (double precision)
1331 : : *
1332 : : * The input is the OID of the statistics, and there are no rows returned if
1333 : : * the statistics contains no histogram.
1334 : : */
1335 : : Datum
1336 : 13 : pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
1337 : : {
1338 : 13 : FuncCallContext *funcctx;
1339 : :
1340 : : /* stuff done only on the first call of the function */
1341 [ + + ]: 13 : if (SRF_IS_FIRSTCALL())
1342 : : {
1343 : 4 : MemoryContext oldcontext;
1344 : 4 : MCVList *mcvlist;
1345 : 4 : TupleDesc tupdesc;
1346 : :
1347 : : /* create a function context for cross-call persistence */
1348 : 4 : funcctx = SRF_FIRSTCALL_INIT();
1349 : :
1350 : : /* switch to memory context appropriate for multiple function calls */
1351 : 4 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1352 : :
1353 : 4 : mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1354 : :
1355 : 4 : funcctx->user_fctx = mcvlist;
1356 : :
1357 : : /* total number of tuples to be returned */
1358 : 4 : funcctx->max_calls = 0;
1359 [ - + ]: 4 : if (funcctx->user_fctx != NULL)
1360 : 4 : funcctx->max_calls = mcvlist->nitems;
1361 : :
1362 : : /* Build a tuple descriptor for our result type */
1363 [ + - ]: 4 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1364 [ # # # # ]: 0 : ereport(ERROR,
1365 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1366 : : errmsg("function returning record called in context "
1367 : : "that cannot accept type record")));
1368 : 4 : tupdesc = BlessTupleDesc(tupdesc);
1369 : :
1370 : : /*
1371 : : * generate attribute metadata needed later to produce tuples from raw
1372 : : * C strings
1373 : : */
1374 : 4 : funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1375 : :
1376 : 4 : MemoryContextSwitchTo(oldcontext);
1377 : 4 : }
1378 : :
1379 : : /* stuff done on every call of the function */
1380 : 13 : funcctx = SRF_PERCALL_SETUP();
1381 : :
1382 [ + + ]: 13 : if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more
1383 : : * left to send */
1384 : : {
1385 : 9 : Datum values[5];
1386 : 9 : bool nulls[5];
1387 : 9 : HeapTuple tuple;
1388 : 9 : Datum result;
1389 : 9 : ArrayBuildState *astate_values = NULL;
1390 : 9 : ArrayBuildState *astate_nulls = NULL;
1391 : :
1392 : 9 : int i;
1393 : 9 : MCVList *mcvlist;
1394 : 9 : MCVItem *item;
1395 : :
1396 : 9 : mcvlist = (MCVList *) funcctx->user_fctx;
1397 : :
1398 [ + - ]: 9 : Assert(funcctx->call_cntr < mcvlist->nitems);
1399 : :
1400 : 9 : item = &mcvlist->items[funcctx->call_cntr];
1401 : :
1402 [ + + ]: 30 : for (i = 0; i < mcvlist->ndimensions; i++)
1403 : : {
1404 : :
1405 : 42 : astate_nulls = accumArrayResult(astate_nulls,
1406 : 21 : BoolGetDatum(item->isnull[i]),
1407 : : false,
1408 : : BOOLOID,
1409 : 21 : CurrentMemoryContext);
1410 : :
1411 [ + + ]: 21 : if (!item->isnull[i])
1412 : : {
1413 : 17 : bool isvarlena;
1414 : 17 : Oid outfunc;
1415 : 17 : FmgrInfo fmgrinfo;
1416 : 17 : Datum val;
1417 : 17 : text *txt;
1418 : :
1419 : : /* lookup output func for the type */
1420 : 17 : getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1421 : 17 : fmgr_info(outfunc, &fmgrinfo);
1422 : :
1423 : 17 : val = FunctionCall1(&fmgrinfo, item->values[i]);
1424 : 17 : txt = cstring_to_text(DatumGetPointer(val));
1425 : :
1426 : 34 : astate_values = accumArrayResult(astate_values,
1427 : 17 : PointerGetDatum(txt),
1428 : : false,
1429 : : TEXTOID,
1430 : 17 : CurrentMemoryContext);
1431 : 17 : }
1432 : : else
1433 : 8 : astate_values = accumArrayResult(astate_values,
1434 : : (Datum) 0,
1435 : : true,
1436 : : TEXTOID,
1437 : 4 : CurrentMemoryContext);
1438 : 21 : }
1439 : :
1440 : 9 : values[0] = Int32GetDatum(funcctx->call_cntr);
1441 : 9 : values[1] = makeArrayResult(astate_values, CurrentMemoryContext);
1442 : 9 : values[2] = makeArrayResult(astate_nulls, CurrentMemoryContext);
1443 : 9 : values[3] = Float8GetDatum(item->frequency);
1444 : 9 : values[4] = Float8GetDatum(item->base_frequency);
1445 : :
1446 : : /* no NULLs in the tuple */
1447 : 9 : memset(nulls, 0, sizeof(nulls));
1448 : :
1449 : : /* build a tuple */
1450 : 9 : tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1451 : :
1452 : : /* make the tuple into a datum */
1453 : 9 : result = HeapTupleGetDatum(tuple);
1454 : :
1455 : 9 : SRF_RETURN_NEXT(funcctx, result);
1456 [ + - ]: 9 : }
1457 : : else /* do when there is no more left */
1458 : : {
1459 [ + - ]: 4 : SRF_RETURN_DONE(funcctx);
1460 : : }
1461 [ - + ]: 13 : }
1462 : :
1463 : : /*
1464 : : * pg_mcv_list_in - input routine for type pg_mcv_list.
1465 : : *
1466 : : * pg_mcv_list is real enough to be a table column, but it has no operations
1467 : : * of its own, and disallows input too
1468 : : */
1469 : : Datum
1470 : 0 : pg_mcv_list_in(PG_FUNCTION_ARGS)
1471 : : {
1472 : : /*
1473 : : * pg_mcv_list stores the data in binary form and parsing text input is
1474 : : * not needed, so disallow this.
1475 : : */
1476 [ # # # # ]: 0 : ereport(ERROR,
1477 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1478 : : errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1479 : :
1480 : 0 : PG_RETURN_VOID(); /* keep compiler quiet */
1481 : : }
1482 : :
1483 : :
1484 : : /*
1485 : : * pg_mcv_list_out - output routine for type pg_mcv_list.
1486 : : *
1487 : : * MCV lists are serialized into a bytea value, so we simply call byteaout()
1488 : : * to serialize the value into text. But it'd be nice to serialize that into
1489 : : * a meaningful representation (e.g. for inspection by people).
1490 : : *
1491 : : * XXX This should probably return something meaningful, similar to what
1492 : : * pg_dependencies_out does. Not sure how to deal with the deduplicated
1493 : : * values, though - do we want to expand that or not?
1494 : : */
1495 : : Datum
1496 : 0 : pg_mcv_list_out(PG_FUNCTION_ARGS)
1497 : : {
1498 : 0 : return byteaout(fcinfo);
1499 : : }
1500 : :
1501 : : /*
1502 : : * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1503 : : */
1504 : : Datum
1505 : 0 : pg_mcv_list_recv(PG_FUNCTION_ARGS)
1506 : : {
1507 [ # # # # ]: 0 : ereport(ERROR,
1508 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1509 : : errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1510 : :
1511 : 0 : PG_RETURN_VOID(); /* keep compiler quiet */
1512 : : }
1513 : :
1514 : : /*
1515 : : * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1516 : : *
1517 : : * MCV lists are serialized in a bytea value (although the type is named
1518 : : * differently), so let's just send that.
1519 : : */
1520 : : Datum
1521 : 0 : pg_mcv_list_send(PG_FUNCTION_ARGS)
1522 : : {
1523 : 0 : return byteasend(fcinfo);
1524 : : }
1525 : :
1526 : : /*
1527 : : * match the attribute/expression to a dimension of the statistic
1528 : : *
1529 : : * Returns the zero-based index of the matching statistics dimension.
1530 : : * Optionally determines the collation.
1531 : : */
1532 : : static int
1533 : 235 : mcv_match_expression(Node *expr, Bitmapset *keys, List *exprs, Oid *collid)
1534 : : {
1535 : 235 : int idx;
1536 : :
1537 [ + + ]: 235 : if (IsA(expr, Var))
1538 : : {
1539 : : /* simple Var, so just lookup using varattno */
1540 : 194 : Var *var = (Var *) expr;
1541 : :
1542 [ + + ]: 194 : if (collid)
1543 : 183 : *collid = var->varcollid;
1544 : :
1545 : 194 : idx = bms_member_index(keys, var->varattno);
1546 : :
1547 [ + - ]: 194 : if (idx < 0)
1548 [ # # # # ]: 0 : elog(ERROR, "variable not found in statistics object");
1549 : 194 : }
1550 : : else
1551 : : {
1552 : : /* expression - lookup in stats expressions */
1553 : 41 : ListCell *lc;
1554 : :
1555 [ + + ]: 41 : if (collid)
1556 : 40 : *collid = exprCollation(expr);
1557 : :
1558 : : /* expressions are stored after the simple columns */
1559 : 41 : idx = bms_num_members(keys);
1560 [ + - - + : 117 : foreach(lc, exprs)
+ - ]
1561 : : {
1562 : 76 : Node *stat_expr = (Node *) lfirst(lc);
1563 : :
1564 [ + + ]: 76 : if (equal(expr, stat_expr))
1565 : 41 : break;
1566 : :
1567 : 35 : idx++;
1568 [ + + ]: 76 : }
1569 : :
1570 [ + - ]: 41 : if (lc == NULL)
1571 [ # # # # ]: 0 : elog(ERROR, "expression not found in statistics object");
1572 : 41 : }
1573 : :
1574 : 470 : return idx;
1575 : 235 : }
1576 : :
1577 : : /*
1578 : : * mcv_get_match_bitmap
1579 : : * Evaluate clauses using the MCV list, and update the match bitmap.
1580 : : *
1581 : : * A match bitmap keeps match/mismatch status for each MCV item, and we
1582 : : * update it based on additional clauses. We also use it to skip items
1583 : : * that can't possibly match (e.g. item marked as "mismatch" can't change
1584 : : * to "match" when evaluating AND clause list).
1585 : : *
1586 : : * The function also returns a flag indicating whether there was an
1587 : : * equality condition for all attributes, the minimum frequency in the MCV
1588 : : * list, and a total MCV frequency (sum of frequencies for all items).
1589 : : *
1590 : : * XXX Currently the match bitmap uses a bool for each MCV item, which is
1591 : : * somewhat wasteful as we could do with just a single bit, thus reducing
1592 : : * the size to ~1/8. It would also allow us to combine bitmaps simply using
1593 : : * & and |, which should be faster than min/max. The bitmaps are fairly
1594 : : * small, though (thanks to the cap on the MCV list size).
1595 : : */
1596 : : static bool *
1597 : 141 : mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
1598 : : Bitmapset *keys, List *exprs,
1599 : : MCVList *mcvlist, bool is_or)
1600 : : {
1601 : 141 : ListCell *l;
1602 : 141 : bool *matches;
1603 : :
1604 : : /* The bitmap may be partially built. */
1605 [ + - ]: 141 : Assert(clauses != NIL);
1606 [ + - ]: 141 : Assert(mcvlist != NULL);
1607 [ + - ]: 141 : Assert(mcvlist->nitems > 0);
1608 [ + - ]: 141 : Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1609 : :
1610 : 141 : matches = palloc_array(bool, mcvlist->nitems);
1611 : 141 : memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
1612 : :
1613 : : /*
1614 : : * Loop through the list of clauses, and for each of them evaluate all the
1615 : : * MCV items not yet eliminated by the preceding clauses.
1616 : : */
1617 [ + - + + : 405 : foreach(l, clauses)
+ + ]
1618 : : {
1619 : 13856 : Node *clause = (Node *) lfirst(l);
1620 : :
1621 : : /* if it's a RestrictInfo, then extract the clause */
1622 [ + + ]: 13856 : if (IsA(clause, RestrictInfo))
1623 : 245 : clause = (Node *) ((RestrictInfo *) clause)->clause;
1624 : :
1625 : : /*
1626 : : * Handle the various types of clauses - OpClause, NullTest and
1627 : : * AND/OR/NOT
1628 : : */
1629 [ + + ]: 13856 : if (is_opclause(clause))
1630 : : {
1631 : 9931 : OpExpr *expr = (OpExpr *) clause;
1632 : 9931 : FmgrInfo opproc;
1633 : :
1634 : : /* valid only after examine_opclause_args returns true */
1635 : 9931 : Node *clause_expr;
1636 : 9931 : Const *cst;
1637 : 9931 : bool expronleft;
1638 : 9931 : int idx;
1639 : 9931 : Oid collid;
1640 : :
1641 : 9931 : fmgr_info(get_opcode(expr->opno), &opproc);
1642 : :
1643 : : /* extract the var/expr and const from the expression */
1644 [ + - ]: 9931 : if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1645 [ # # # # ]: 0 : elog(ERROR, "incompatible clause");
1646 : :
1647 : : /* match the attribute/expression to a dimension of the statistic */
1648 : 9931 : idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1649 : :
1650 : : /*
1651 : : * Walk through the MCV items and evaluate the current clause. We
1652 : : * can skip items that were already ruled out, and terminate if
1653 : : * there are no remaining MCV items that might possibly match.
1654 : : */
1655 [ + + ]: 22657 : for (int i = 0; i < mcvlist->nitems; i++)
1656 : : {
1657 : 22480 : bool match = true;
1658 : 22480 : MCVItem *item = &mcvlist->items[i];
1659 : :
1660 [ + - ]: 22480 : Assert(idx >= 0);
1661 : :
1662 : : /*
1663 : : * When the MCV item or the Const value is NULL we can treat
1664 : : * this as a mismatch. We must not call the operator because
1665 : : * of strictness.
1666 : : */
1667 [ + + + + ]: 22480 : if (item->isnull[idx] || cst->constisnull)
1668 : : {
1669 [ + + - + : 19538 : matches[i] = RESULT_MERGE(matches[i], is_or, false);
- + ]
1670 : 8 : continue;
1671 : : }
1672 : :
1673 : : /*
1674 : : * Skip MCV items that can't change result in the bitmap. Once
1675 : : * the value gets false for AND-lists, or true for OR-lists,
1676 : : * we don't need to look at more clauses.
1677 : : */
1678 [ + + + + ]: 22488 : if (RESULT_IS_FINAL(matches[i], is_or))
1679 : 5087 : continue;
1680 : :
1681 : : /*
1682 : : * First check whether the constant is below the lower
1683 : : * boundary (in that case we can skip the bucket, because
1684 : : * there's no overlap).
1685 : : *
1686 : : * We don't store collations used to build the statistics, but
1687 : : * we can use the collation for the attribute itself, as
1688 : : * stored in varcollid. We do reset the statistics after a
1689 : : * type change (including collation change), so this is OK.
1690 : : * For expressions, we use the collation extracted from the
1691 : : * expression itself.
1692 : : */
1693 [ + + ]: 17401 : if (expronleft)
1694 : 7195 : match = DatumGetBool(FunctionCall2Coll(&opproc,
1695 : 7195 : collid,
1696 : 7195 : item->values[idx],
1697 : 7195 : cst->constvalue));
1698 : : else
1699 : 436 : match = DatumGetBool(FunctionCall2Coll(&opproc,
1700 : 436 : collid,
1701 : 436 : cst->constvalue,
1702 : 436 : item->values[idx]));
1703 : :
1704 : : /* update the match bitmap with the result */
1705 [ + + - + : 7631 : matches[i] = RESULT_MERGE(matches[i], is_or, match);
- + ]
1706 [ + + ]: 12726 : }
1707 : 177 : }
1708 [ + + ]: 3925 : else if (IsA(clause, ScalarArrayOpExpr))
1709 : : {
1710 : 3884 : ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause;
1711 : 3884 : FmgrInfo opproc;
1712 : :
1713 : : /* valid only after examine_opclause_args returns true */
1714 : 3884 : Node *clause_expr;
1715 : 3884 : Const *cst;
1716 : 3884 : bool expronleft;
1717 : 3884 : Oid collid;
1718 : 3884 : int idx;
1719 : :
1720 : : /* array evaluation */
1721 : 3884 : ArrayType *arrayval;
1722 : 3884 : int16 elmlen;
1723 : 3884 : bool elmbyval;
1724 : 3884 : char elmalign;
1725 : 3884 : int num_elems;
1726 : 3884 : Datum *elem_values;
1727 : 3884 : bool *elem_nulls;
1728 : :
1729 : 3884 : fmgr_info(get_opcode(expr->opno), &opproc);
1730 : :
1731 : : /* extract the var/expr and const from the expression */
1732 [ + - ]: 3884 : if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
1733 [ # # # # ]: 0 : elog(ERROR, "incompatible clause");
1734 : :
1735 : : /* We expect Var on left */
1736 [ + - ]: 3884 : if (!expronleft)
1737 [ # # # # ]: 0 : elog(ERROR, "incompatible clause");
1738 : :
1739 : : /*
1740 : : * Deconstruct the array constant, unless it's NULL (we'll cover
1741 : : * that case below)
1742 : : */
1743 [ + + ]: 3884 : if (!cst->constisnull)
1744 : : {
1745 : 46 : arrayval = DatumGetArrayTypeP(cst->constvalue);
1746 : 46 : get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1747 : : &elmlen, &elmbyval, &elmalign);
1748 : 92 : deconstruct_array(arrayval,
1749 : 46 : ARR_ELEMTYPE(arrayval),
1750 : 46 : elmlen, elmbyval, elmalign,
1751 : : &elem_values, &elem_nulls, &num_elems);
1752 : 46 : }
1753 : :
1754 : : /* match the attribute/expression to a dimension of the statistic */
1755 : 3884 : idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
1756 : :
1757 : : /*
1758 : : * Walk through the MCV items and evaluate the current clause. We
1759 : : * can skip items that were already ruled out, and terminate if
1760 : : * there are no remaining MCV items that might possibly match.
1761 : : */
1762 [ + + ]: 7666 : for (int i = 0; i < mcvlist->nitems; i++)
1763 : : {
1764 : 7620 : int j;
1765 : 7620 : bool match = !expr->useOr;
1766 : 7620 : MCVItem *item = &mcvlist->items[i];
1767 : :
1768 : : /*
1769 : : * When the MCV item or the Const value is NULL we can treat
1770 : : * this as a mismatch. We must not call the operator because
1771 : : * of strictness.
1772 : : */
1773 [ + + + + ]: 7620 : if (item->isnull[idx] || cst->constisnull)
1774 : : {
1775 [ - + # # : 7686 : matches[i] = RESULT_MERGE(matches[i], is_or, false);
+ + ]
1776 : 3 : continue;
1777 : : }
1778 : :
1779 : : /*
1780 : : * Skip MCV items that can't change result in the bitmap. Once
1781 : : * the value gets false for AND-lists, or true for OR-lists,
1782 : : * we don't need to look at more clauses.
1783 : : */
1784 [ + + + + ]: 7623 : if (RESULT_IS_FINAL(matches[i], is_or))
1785 : 1924 : continue;
1786 : :
1787 [ + + ]: 9803 : for (j = 0; j < num_elems; j++)
1788 : : {
1789 : 5163 : Datum elem_value = elem_values[j];
1790 : 5163 : bool elem_isnull = elem_nulls[j];
1791 : 5163 : bool elem_match;
1792 : :
1793 : : /* NULL values always evaluate as not matching. */
1794 [ + + ]: 5163 : if (elem_isnull)
1795 : : {
1796 [ + - + + : 352 : match = RESULT_MERGE(match, expr->useOr, false);
# # ]
1797 : 352 : continue;
1798 : : }
1799 : :
1800 : : /*
1801 : : * Stop evaluating the array elements once we reach a
1802 : : * matching value that can't change - ALL() is the same as
1803 : : * AND-list, ANY() is the same as OR-list.
1804 : : */
1805 [ + + + + ]: 4811 : if (RESULT_IS_FINAL(match, expr->useOr))
1806 : 1059 : break;
1807 : :
1808 : 3752 : elem_match = DatumGetBool(FunctionCall2Coll(&opproc,
1809 : 3752 : collid,
1810 : 3752 : item->values[idx],
1811 : 3752 : elem_value));
1812 : :
1813 [ + + - + : 3752 : match = RESULT_MERGE(match, expr->useOr, elem_match);
- + ]
1814 [ + + + ]: 5163 : }
1815 : :
1816 : : /* update the match bitmap with the result */
1817 [ + + - + : 1855 : matches[i] = RESULT_MERGE(matches[i], is_or, match);
- + ]
1818 [ + + ]: 3782 : }
1819 : 46 : }
1820 [ + + ]: 41 : else if (IsA(clause, NullTest))
1821 : : {
1822 : 11 : NullTest *expr = (NullTest *) clause;
1823 : 11 : Node *clause_expr = (Node *) (expr->arg);
1824 : :
1825 : : /* match the attribute/expression to a dimension of the statistic */
1826 : 11 : int idx = mcv_match_expression(clause_expr, keys, exprs, NULL);
1827 : :
1828 : : /*
1829 : : * Walk through the MCV items and evaluate the current clause. We
1830 : : * can skip items that were already ruled out, and terminate if
1831 : : * there are no remaining MCV items that might possibly match.
1832 : : */
1833 [ + + ]: 1013 : for (int i = 0; i < mcvlist->nitems; i++)
1834 : : {
1835 : 1002 : bool match = false; /* assume mismatch */
1836 : 1002 : MCVItem *item = &mcvlist->items[i];
1837 : :
1838 : : /* if the clause mismatches the MCV item, update the bitmap */
1839 [ - + + ]: 1002 : switch (expr->nulltesttype)
1840 : : {
1841 : : case IS_NULL:
1842 [ + + ]: 702 : match = (item->isnull[idx]) ? true : match;
1843 : 702 : break;
1844 : :
1845 : : case IS_NOT_NULL:
1846 [ + + ]: 300 : match = (!item->isnull[idx]) ? true : match;
1847 : 300 : break;
1848 : : }
1849 : :
1850 : : /* now, update the match bitmap, depending on OR/AND type */
1851 [ - + # # : 1002 : matches[i] = RESULT_MERGE(matches[i], is_or, match);
+ + ]
1852 : 1002 : }
1853 : 11 : }
1854 [ + + + + ]: 30 : else if (is_orclause(clause) || is_andclause(clause))
1855 : : {
1856 : : /* AND/OR clause, with all subclauses being compatible */
1857 : :
1858 : 11 : int i;
1859 : 11 : BoolExpr *bool_clause = ((BoolExpr *) clause);
1860 : 11 : List *bool_clauses = bool_clause->args;
1861 : :
1862 : : /* match/mismatch bitmap for each MCV item */
1863 : 11 : bool *bool_matches = NULL;
1864 : :
1865 [ - + ]: 11 : Assert(bool_clauses != NIL);
1866 [ - + ]: 11 : Assert(list_length(bool_clauses) >= 2);
1867 : :
1868 : : /* build the match bitmap for the OR-clauses */
1869 : 22 : bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys, exprs,
1870 : 11 : mcvlist, is_orclause(clause));
1871 : :
1872 : : /*
1873 : : * Merge the bitmap produced by mcv_get_match_bitmap into the
1874 : : * current one. We need to consider if we're evaluating AND or OR
1875 : : * condition when merging the results.
1876 : : */
1877 [ + + ]: 727 : for (i = 0; i < mcvlist->nitems; i++)
1878 [ - + # # : 716 : matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
- + ]
1879 : :
1880 : 11 : pfree(bool_matches);
1881 : 11 : }
1882 [ + + ]: 19 : else if (is_notclause(clause))
1883 : : {
1884 : : /* NOT clause, with all subclauses compatible */
1885 : :
1886 : 5 : int i;
1887 : 5 : BoolExpr *not_clause = ((BoolExpr *) clause);
1888 : 5 : List *not_args = not_clause->args;
1889 : :
1890 : : /* match/mismatch bitmap for each MCV item */
1891 : 5 : bool *not_matches = NULL;
1892 : :
1893 [ - + ]: 5 : Assert(not_args != NIL);
1894 [ - + ]: 5 : Assert(list_length(not_args) == 1);
1895 : :
1896 : : /* build the match bitmap for the NOT-clause */
1897 : 10 : not_matches = mcv_get_match_bitmap(root, not_args, keys, exprs,
1898 : 5 : mcvlist, false);
1899 : :
1900 : : /*
1901 : : * Merge the bitmap produced by mcv_get_match_bitmap into the
1902 : : * current one. We're handling a NOT clause, so invert the result
1903 : : * before merging it into the global bitmap.
1904 : : */
1905 [ + + ]: 25 : for (i = 0; i < mcvlist->nitems; i++)
1906 [ - + # # : 20 : matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
+ + ]
1907 : :
1908 : 5 : pfree(not_matches);
1909 : 5 : }
1910 [ + + ]: 14 : else if (IsA(clause, Var))
1911 : : {
1912 : : /* Var (has to be a boolean Var, possibly from below NOT) */
1913 : :
1914 : 13 : Var *var = (Var *) (clause);
1915 : :
1916 : : /* match the attribute to a dimension of the statistic */
1917 : 13 : int idx = bms_member_index(keys, var->varattno);
1918 : :
1919 [ + - ]: 13 : Assert(var->vartype == BOOLOID);
1920 : :
1921 : : /*
1922 : : * Walk through the MCV items and evaluate the current clause. We
1923 : : * can skip items that were already ruled out, and terminate if
1924 : : * there are no remaining MCV items that might possibly match.
1925 : : */
1926 [ + + ]: 63 : for (int i = 0; i < mcvlist->nitems; i++)
1927 : : {
1928 : 50 : MCVItem *item = &mcvlist->items[i];
1929 : 50 : bool match = false;
1930 : :
1931 : : /* if the item is NULL, it's a mismatch */
1932 [ + - + + ]: 50 : if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1933 : 25 : match = true;
1934 : :
1935 : : /* update the result bitmap */
1936 [ + + - + : 50 : matches[i] = RESULT_MERGE(matches[i], is_or, match);
+ + ]
1937 : 50 : }
1938 : 13 : }
1939 : : else
1940 : : {
1941 : : /* Otherwise, it must be a bare boolean-returning expression */
1942 : 1 : int idx;
1943 : :
1944 : : /* match the expression to a dimension of the statistic */
1945 : 1 : idx = mcv_match_expression(clause, keys, exprs, NULL);
1946 : :
1947 : : /*
1948 : : * Walk through the MCV items and evaluate the current clause. We
1949 : : * can skip items that were already ruled out, and terminate if
1950 : : * there are no remaining MCV items that might possibly match.
1951 : : */
1952 [ + + ]: 37 : for (int i = 0; i < mcvlist->nitems; i++)
1953 : : {
1954 : 36 : bool match;
1955 : 36 : MCVItem *item = &mcvlist->items[i];
1956 : :
1957 : : /* "match" just means it's bool TRUE */
1958 [ - + ]: 36 : match = !item->isnull[idx] && DatumGetBool(item->values[idx]);
1959 : :
1960 : : /* now, update the match bitmap, depending on OR/AND type */
1961 [ - + # # : 36 : matches[i] = RESULT_MERGE(matches[i], is_or, match);
- + ]
1962 : 36 : }
1963 : 1 : }
1964 : 264 : }
1965 : :
1966 : 26902 : return matches;
1967 : 13451 : }
1968 : :
1969 : :
1970 : : /*
1971 : : * mcv_combine_selectivities
1972 : : * Combine per-column and multi-column MCV selectivity estimates.
1973 : : *
1974 : : * simple_sel is a "simple" selectivity estimate (produced without using any
1975 : : * extended statistics, essentially assuming independence of columns/clauses).
1976 : : *
1977 : : * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
1978 : : * all matching MCV items. The difference (mcv_sel - mcv_basesel) is then
1979 : : * essentially interpreted as a correction to be added to simple_sel, as
1980 : : * described below.
1981 : : *
1982 : : * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
1983 : : * matching ones). This is used as an upper bound on the portion of the
1984 : : * selectivity estimates not covered by the MCV statistics.
1985 : : *
1986 : : * Note: While simple and base selectivities are defined in a quite similar
1987 : : * way, the values are computed differently and are not therefore equal. The
1988 : : * simple selectivity is computed as a product of per-clause estimates, while
1989 : : * the base selectivity is computed by adding up base frequencies of matching
1990 : : * items of the multi-column MCV list. So the values may differ for two main
1991 : : * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
1992 : : * the MCV items did not match the estimated clauses.
1993 : : *
1994 : : * As both (a) and (b) reduce the base selectivity value, it generally holds
1995 : : * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
1996 : : * values may be equal.
1997 : : *
1998 : : * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
1999 : : * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
2000 : : * correction for the part covered by the MCV list. Those two statements are
2001 : : * actually equivalent.
2002 : : */
2003 : : Selectivity
2004 : 133 : mcv_combine_selectivities(Selectivity simple_sel,
2005 : : Selectivity mcv_sel,
2006 : : Selectivity mcv_basesel,
2007 : : Selectivity mcv_totalsel)
2008 : : {
2009 : 133 : Selectivity other_sel;
2010 : 133 : Selectivity sel;
2011 : :
2012 : : /* estimated selectivity of values not covered by MCV matches */
2013 : 133 : other_sel = simple_sel - mcv_basesel;
2014 [ + + + - ]: 254 : CLAMP_PROBABILITY(other_sel);
2015 : :
2016 : : /* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
2017 [ + + ]: 133 : if (other_sel > 1.0 - mcv_totalsel)
2018 : 74 : other_sel = 1.0 - mcv_totalsel;
2019 : :
2020 : : /* overall selectivity is the sum of the MCV and non-MCV parts */
2021 : 133 : sel = mcv_sel + other_sel;
2022 [ + + + - ]: 258 : CLAMP_PROBABILITY(sel);
2023 : :
2024 : 266 : return sel;
2025 : 133 : }
2026 : :
2027 : :
2028 : : /*
2029 : : * mcv_clauselist_selectivity
2030 : : * Use MCV statistics to estimate the selectivity of an implicitly-ANDed
2031 : : * list of clauses.
2032 : : *
2033 : : * This determines which MCV items match every clause in the list and returns
2034 : : * the sum of the frequencies of those items.
2035 : : *
2036 : : * In addition, it returns the sum of the base frequencies of each of those
2037 : : * items (that is the sum of the selectivities that each item would have if
2038 : : * the columns were independent of one another), and the total selectivity of
2039 : : * all the MCV items (not just the matching ones). These are expected to be
2040 : : * used together with a "simple" selectivity estimate (one based only on
2041 : : * per-column statistics) to produce an overall selectivity estimate that
2042 : : * makes use of both per-column and multi-column statistics --- see
2043 : : * mcv_combine_selectivities().
2044 : : */
2045 : : Selectivity
2046 : 85 : mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
2047 : : List *clauses, int varRelid,
2048 : : JoinType jointype, SpecialJoinInfo *sjinfo,
2049 : : RelOptInfo *rel,
2050 : : Selectivity *basesel, Selectivity *totalsel)
2051 : : {
2052 : 85 : int i;
2053 : 85 : MCVList *mcv;
2054 : 85 : Selectivity s = 0.0;
2055 : 85 : RangeTblEntry *rte = root->simple_rte_array[rel->relid];
2056 : :
2057 : : /* match/mismatch bitmap for each MCV item */
2058 : 85 : bool *matches = NULL;
2059 : :
2060 : : /* load the MCV list stored in the statistics object */
2061 : 85 : mcv = statext_mcv_load(stat->statOid, rte->inh);
2062 : :
2063 : : /* build a match bitmap for the clauses */
2064 : 170 : matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
2065 : 85 : mcv, false);
2066 : :
2067 : : /* sum frequencies for all the matching MCV items */
2068 : 85 : *basesel = 0.0;
2069 : 85 : *totalsel = 0.0;
2070 [ + + ]: 6305 : for (i = 0; i < mcv->nitems; i++)
2071 : : {
2072 : 6220 : *totalsel += mcv->items[i].frequency;
2073 : :
2074 [ + + ]: 6220 : if (matches[i] != false)
2075 : : {
2076 : 111 : *basesel += mcv->items[i].base_frequency;
2077 : 111 : s += mcv->items[i].frequency;
2078 : 111 : }
2079 : 6220 : }
2080 : :
2081 : 170 : return s;
2082 : 85 : }
2083 : :
2084 : :
2085 : : /*
2086 : : * mcv_clause_selectivity_or
2087 : : * Use MCV statistics to estimate the selectivity of a clause that
2088 : : * appears in an ORed list of clauses.
2089 : : *
2090 : : * As with mcv_clauselist_selectivity() this determines which MCV items match
2091 : : * the clause and returns both the sum of the frequencies and the sum of the
2092 : : * base frequencies of those items, as well as the sum of the frequencies of
2093 : : * all MCV items (not just the matching ones) so that this information can be
2094 : : * used by mcv_combine_selectivities() to produce a selectivity estimate that
2095 : : * makes use of both per-column and multi-column statistics.
2096 : : *
2097 : : * Additionally, we return information to help compute the overall selectivity
2098 : : * of the ORed list of clauses assumed to contain this clause. This function
2099 : : * is intended to be called for each clause in the ORed list of clauses,
2100 : : * allowing the overall selectivity to be computed using the following
2101 : : * algorithm:
2102 : : *
2103 : : * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
2104 : : * of the first n clauses in the list. Then the combined selectivity taking
2105 : : * into account the next clause C[n+1] can be written as
2106 : : *
2107 : : * P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
2108 : : *
2109 : : * The final term above represents the overlap between the clauses examined so
2110 : : * far and the (n+1)'th clause. To estimate its selectivity, we track the
2111 : : * match bitmap for the ORed list of clauses examined so far and examine its
2112 : : * intersection with the match bitmap for the (n+1)'th clause.
2113 : : *
2114 : : * We then also return the sums of the MCV item frequencies and base
2115 : : * frequencies for the match bitmap intersection corresponding to the overlap
2116 : : * term above, so that they can be combined with a simple selectivity estimate
2117 : : * for that term.
2118 : : *
2119 : : * The parameter "or_matches" is an in/out parameter tracking the match bitmap
2120 : : * for the clauses examined so far. The caller is expected to set it to NULL
2121 : : * the first time it calls this function.
2122 : : */
2123 : : Selectivity
2124 : 40 : mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
2125 : : MCVList *mcv, Node *clause, bool **or_matches,
2126 : : Selectivity *basesel, Selectivity *overlap_mcvsel,
2127 : : Selectivity *overlap_basesel, Selectivity *totalsel)
2128 : : {
2129 : 40 : Selectivity s = 0.0;
2130 : 40 : bool *new_matches;
2131 : 40 : int i;
2132 : :
2133 : : /* build the OR-matches bitmap, if not built already */
2134 [ + + ]: 40 : if (*or_matches == NULL)
2135 : 16 : *or_matches = palloc0_array(bool, mcv->nitems);
2136 : :
2137 : : /* build the match bitmap for the new clause */
2138 : 80 : new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
2139 : 40 : stat->exprs, mcv, false);
2140 : :
2141 : : /*
2142 : : * Sum the frequencies for all the MCV items matching this clause and also
2143 : : * those matching the overlap between this clause and any of the preceding
2144 : : * clauses as described above.
2145 : : */
2146 : 40 : *basesel = 0.0;
2147 : 40 : *overlap_mcvsel = 0.0;
2148 : 40 : *overlap_basesel = 0.0;
2149 : 40 : *totalsel = 0.0;
2150 [ + + ]: 2586 : for (i = 0; i < mcv->nitems; i++)
2151 : : {
2152 : 2546 : *totalsel += mcv->items[i].frequency;
2153 : :
2154 [ + + ]: 2546 : if (new_matches[i])
2155 : : {
2156 : 56 : s += mcv->items[i].frequency;
2157 : 56 : *basesel += mcv->items[i].base_frequency;
2158 : :
2159 [ + + ]: 56 : if ((*or_matches)[i])
2160 : : {
2161 : 24 : *overlap_mcvsel += mcv->items[i].frequency;
2162 : 24 : *overlap_basesel += mcv->items[i].base_frequency;
2163 : 24 : }
2164 : 56 : }
2165 : :
2166 : : /* update the OR-matches bitmap for the next clause */
2167 [ + + ]: 2546 : (*or_matches)[i] = (*or_matches)[i] || new_matches[i];
2168 : 2546 : }
2169 : :
2170 : 40 : pfree(new_matches);
2171 : :
2172 : 80 : return s;
2173 : 40 : }
2174 : :
2175 : : /*
2176 : : * Free allocations of a MCVList.
2177 : : */
2178 : : void
2179 : 0 : statext_mcv_free(MCVList *mcvlist)
2180 : : {
2181 [ # # ]: 0 : for (int i = 0; i < mcvlist->nitems; i++)
2182 : : {
2183 : 0 : MCVItem *item = &mcvlist->items[i];
2184 : :
2185 : 0 : pfree(item->values);
2186 : 0 : pfree(item->isnull);
2187 : 0 : }
2188 : 0 : pfree(mcvlist);
2189 : 0 : }
|