Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * test_integerset.c
4 : * Test integer set data structure.
5 : *
6 : * Copyright (c) 2019-2026, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/test/modules/test_integerset/test_integerset.c
10 : *
11 : * -------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "common/pg_prng.h"
16 : #include "fmgr.h"
17 : #include "lib/integerset.h"
18 : #include "utils/memutils.h"
19 : #include "utils/timestamp.h"
20 :
21 : /*
22 : * If you enable this, the "pattern" tests will print information about
23 : * how long populating, probing, and iterating the test set takes, and
24 : * how much memory the test set consumed. That can be used as
25 : * micro-benchmark of various operations and input patterns (you might
26 : * want to increase the number of values used in each of the test, if
27 : * you do that, to reduce noise).
28 : *
29 : * The information is printed to the server's stderr, mostly because
30 : * that's where MemoryContextStats() output goes.
31 : */
32 : static const bool intset_test_stats = false;
33 :
34 0 : PG_MODULE_MAGIC;
35 :
36 0 : PG_FUNCTION_INFO_V1(test_integerset);
37 :
38 : /*
39 : * A struct to define a pattern of integers, for use with the test_pattern()
40 : * function.
41 : */
42 : typedef struct
43 : {
44 : char *test_name; /* short name of the test, for humans */
45 : char *pattern_str; /* a bit pattern */
46 : uint64 spacing; /* pattern repeats at this interval */
47 : uint64 num_values; /* number of integers to set in total */
48 : } test_spec;
49 :
50 : static const test_spec test_specs[] = {
51 : {
52 : "all ones", "1111111111",
53 : 10, 10000000
54 : },
55 : {
56 : "alternating bits", "0101010101",
57 : 10, 10000000
58 : },
59 : {
60 : "clusters of ten", "1111111111",
61 : 10000, 10000000
62 : },
63 : {
64 : "clusters of hundred",
65 : "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
66 : 10000, 100000000
67 : },
68 : {
69 : "one-every-64k", "1",
70 : 65536, 10000000
71 : },
72 : {
73 : "sparse", "100000000000000000000000000000001",
74 : 10000000, 10000000
75 : },
76 : {
77 : "single values, distance > 2^32", "1",
78 : UINT64CONST(10000000000), 1000000
79 : },
80 : {
81 : "clusters, distance > 2^32", "10101010",
82 : UINT64CONST(10000000000), 10000000
83 : },
84 : {
85 : "clusters, distance > 2^60", "10101010",
86 : UINT64CONST(2000000000000000000),
87 : 23 /* can't be much higher than this, or we
88 : * overflow uint64 */
89 : }
90 : };
91 :
92 : static void test_pattern(const test_spec *spec);
93 : static void test_empty(void);
94 : static void test_single_value(uint64 value);
95 : static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
96 : static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
97 : static void test_huge_distances(void);
98 :
99 : /*
100 : * SQL-callable entry point to perform all tests.
101 : */
102 : Datum
103 0 : test_integerset(PG_FUNCTION_ARGS)
104 : {
105 : /* Tests for various corner cases */
106 0 : test_empty();
107 0 : test_huge_distances();
108 0 : test_single_value(0);
109 0 : test_single_value(1);
110 0 : test_single_value(PG_UINT64_MAX - 1);
111 0 : test_single_value(PG_UINT64_MAX);
112 0 : test_single_value_and_filler(0, 1000, 2000);
113 0 : test_single_value_and_filler(1, 1000, 2000);
114 0 : test_single_value_and_filler(1, 1000, 2000000);
115 0 : test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
116 0 : test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
117 :
118 : /* Test different test patterns, with lots of entries */
119 0 : for (int i = 0; i < lengthof(test_specs); i++)
120 : {
121 0 : test_pattern(&test_specs[i]);
122 0 : }
123 :
124 0 : PG_RETURN_VOID();
125 : }
126 :
127 : /*
128 : * Test with a repeating pattern, defined by the 'spec'.
129 : */
130 : static void
131 0 : test_pattern(const test_spec *spec)
132 : {
133 0 : IntegerSet *intset;
134 0 : MemoryContext intset_ctx;
135 0 : MemoryContext old_ctx;
136 0 : TimestampTz starttime;
137 0 : TimestampTz endtime;
138 0 : uint64 n;
139 0 : uint64 last_int;
140 0 : int patternlen;
141 0 : uint64 *pattern_values;
142 0 : uint64 pattern_num_values;
143 :
144 0 : elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
145 : if (intset_test_stats)
146 : fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
147 :
148 : /* Pre-process the pattern, creating an array of integers from it. */
149 0 : patternlen = strlen(spec->pattern_str);
150 0 : pattern_values = palloc(patternlen * sizeof(uint64));
151 0 : pattern_num_values = 0;
152 0 : for (int i = 0; i < patternlen; i++)
153 : {
154 0 : if (spec->pattern_str[i] == '1')
155 0 : pattern_values[pattern_num_values++] = i;
156 0 : }
157 :
158 : /*
159 : * Allocate the integer set.
160 : *
161 : * Allocate it in a separate memory context, so that we can print its
162 : * memory usage easily. (intset_create() creates a memory context of its
163 : * own, too, but we don't have direct access to it, so we cannot call
164 : * MemoryContextStats() on it directly).
165 : */
166 0 : intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
167 : "intset test",
168 : ALLOCSET_SMALL_SIZES);
169 0 : MemoryContextSetIdentifier(intset_ctx, spec->test_name);
170 0 : old_ctx = MemoryContextSwitchTo(intset_ctx);
171 0 : intset = intset_create();
172 0 : MemoryContextSwitchTo(old_ctx);
173 :
174 : /*
175 : * Add values to the set.
176 : */
177 0 : starttime = GetCurrentTimestamp();
178 :
179 0 : n = 0;
180 0 : last_int = 0;
181 0 : while (n < spec->num_values)
182 : {
183 0 : uint64 x = 0;
184 :
185 0 : for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
186 : {
187 0 : x = last_int + pattern_values[i];
188 :
189 0 : intset_add_member(intset, x);
190 0 : n++;
191 0 : }
192 0 : last_int += spec->spacing;
193 0 : }
194 :
195 0 : endtime = GetCurrentTimestamp();
196 :
197 : if (intset_test_stats)
198 : fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
199 : spec->num_values, (int) (endtime - starttime) / 1000);
200 :
201 : /*
202 : * Print stats on the amount of memory used.
203 : *
204 : * We print the usage reported by intset_memory_usage(), as well as the
205 : * stats from the memory context. They should be in the same ballpark,
206 : * but it's hard to automate testing that, so if you're making changes to
207 : * the implementation, just observe that manually.
208 : */
209 : if (intset_test_stats)
210 : {
211 : uint64 mem_usage;
212 :
213 : /*
214 : * Also print memory usage as reported by intset_memory_usage(). It
215 : * should be in the same ballpark as the usage reported by
216 : * MemoryContextStats().
217 : */
218 : mem_usage = intset_memory_usage(intset);
219 : fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
220 : mem_usage, (double) mem_usage / spec->num_values);
221 :
222 : MemoryContextStats(intset_ctx);
223 : }
224 :
225 : /* Check that intset_get_num_entries works */
226 0 : n = intset_num_entries(intset);
227 0 : if (n != spec->num_values)
228 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
229 :
230 : /*
231 : * Test random-access probes with intset_is_member()
232 : */
233 0 : starttime = GetCurrentTimestamp();
234 :
235 0 : for (n = 0; n < 100000; n++)
236 : {
237 0 : bool b;
238 0 : bool expected;
239 0 : uint64 x;
240 :
241 : /*
242 : * Pick next value to probe at random. We limit the probes to the
243 : * last integer that we added to the set, plus an arbitrary constant
244 : * (1000). There's no point in probing the whole 0 - 2^64 range, if
245 : * only a small part of the integer space is used. We would very
246 : * rarely hit values that are actually in the set.
247 : */
248 0 : x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
249 :
250 : /* Do we expect this value to be present in the set? */
251 0 : if (x >= last_int)
252 0 : expected = false;
253 : else
254 : {
255 0 : uint64 idx = x % spec->spacing;
256 :
257 0 : if (idx >= patternlen)
258 0 : expected = false;
259 0 : else if (spec->pattern_str[idx] == '1')
260 0 : expected = true;
261 : else
262 0 : expected = false;
263 0 : }
264 :
265 : /* Is it present according to intset_is_member() ? */
266 0 : b = intset_is_member(intset, x);
267 :
268 0 : if (b != expected)
269 0 : elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
270 0 : }
271 0 : endtime = GetCurrentTimestamp();
272 : if (intset_test_stats)
273 : fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
274 : n, (int) (endtime - starttime) / 1000);
275 :
276 : /*
277 : * Test iterator
278 : */
279 0 : starttime = GetCurrentTimestamp();
280 :
281 0 : intset_begin_iterate(intset);
282 0 : n = 0;
283 0 : last_int = 0;
284 0 : while (n < spec->num_values)
285 : {
286 0 : for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
287 : {
288 0 : uint64 expected = last_int + pattern_values[i];
289 0 : uint64 x;
290 :
291 0 : if (!intset_iterate_next(intset, &x))
292 0 : break;
293 :
294 0 : if (x != expected)
295 0 : elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
296 0 : n++;
297 0 : }
298 0 : last_int += spec->spacing;
299 : }
300 0 : endtime = GetCurrentTimestamp();
301 : if (intset_test_stats)
302 : fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
303 : n, (int) (endtime - starttime) / 1000);
304 :
305 0 : if (n < spec->num_values)
306 0 : elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
307 0 : if (n > spec->num_values)
308 0 : elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
309 :
310 0 : MemoryContextDelete(intset_ctx);
311 0 : }
312 :
313 : /*
314 : * Test with a set containing a single integer.
315 : */
316 : static void
317 0 : test_single_value(uint64 value)
318 : {
319 0 : IntegerSet *intset;
320 0 : uint64 x;
321 0 : uint64 num_entries;
322 0 : bool found;
323 :
324 0 : elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
325 :
326 : /* Create the set. */
327 0 : intset = intset_create();
328 0 : intset_add_member(intset, value);
329 :
330 : /* Test intset_get_num_entries() */
331 0 : num_entries = intset_num_entries(intset);
332 0 : if (num_entries != 1)
333 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
334 :
335 : /*
336 : * Test intset_is_member() at various special values, like 0 and maximum
337 : * possible 64-bit integer, as well as the value itself.
338 : */
339 0 : if (intset_is_member(intset, 0) != (value == 0))
340 0 : elog(ERROR, "intset_is_member failed for 0");
341 0 : if (intset_is_member(intset, 1) != (value == 1))
342 0 : elog(ERROR, "intset_is_member failed for 1");
343 0 : if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
344 0 : elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
345 0 : if (intset_is_member(intset, value) != true)
346 0 : elog(ERROR, "intset_is_member failed for the tested value");
347 :
348 : /*
349 : * Test iterator
350 : */
351 0 : intset_begin_iterate(intset);
352 0 : found = intset_iterate_next(intset, &x);
353 0 : if (!found || x != value)
354 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
355 :
356 0 : found = intset_iterate_next(intset, &x);
357 0 : if (found)
358 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
359 0 : }
360 :
361 : /*
362 : * Test with an integer set that contains:
363 : *
364 : * - a given single 'value', and
365 : * - all integers between 'filler_min' and 'filler_max'.
366 : *
367 : * This exercises different codepaths than testing just with a single value,
368 : * because the implementation buffers newly-added values. If we add just a
369 : * single value to the set, we won't test the internal B-tree code at all,
370 : * just the code that deals with the buffer.
371 : */
372 : static void
373 0 : test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
374 : {
375 0 : IntegerSet *intset;
376 0 : uint64 x;
377 0 : bool found;
378 0 : uint64 *iter_expected;
379 0 : uint64 n = 0;
380 0 : uint64 num_entries = 0;
381 0 : uint64 mem_usage;
382 :
383 0 : elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
384 : value, filler_min, filler_max);
385 :
386 0 : intset = intset_create();
387 :
388 0 : iter_expected = palloc_array(uint64, filler_max - filler_min + 1);
389 0 : if (value < filler_min)
390 : {
391 0 : intset_add_member(intset, value);
392 0 : iter_expected[n++] = value;
393 0 : }
394 :
395 0 : for (x = filler_min; x < filler_max; x++)
396 : {
397 0 : intset_add_member(intset, x);
398 0 : iter_expected[n++] = x;
399 0 : }
400 :
401 0 : if (value >= filler_max)
402 : {
403 0 : intset_add_member(intset, value);
404 0 : iter_expected[n++] = value;
405 0 : }
406 :
407 : /* Test intset_get_num_entries() */
408 0 : num_entries = intset_num_entries(intset);
409 0 : if (num_entries != n)
410 0 : elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
411 :
412 : /*
413 : * Test intset_is_member() at various spots, at and around the values that
414 : * we expect to be set, as well as 0 and the maximum possible value.
415 : */
416 0 : check_with_filler(intset, 0,
417 0 : value, filler_min, filler_max);
418 0 : check_with_filler(intset, 1,
419 0 : value, filler_min, filler_max);
420 0 : check_with_filler(intset, filler_min - 1,
421 0 : value, filler_min, filler_max);
422 0 : check_with_filler(intset, filler_min,
423 0 : value, filler_min, filler_max);
424 0 : check_with_filler(intset, filler_min + 1,
425 0 : value, filler_min, filler_max);
426 0 : check_with_filler(intset, value - 1,
427 0 : value, filler_min, filler_max);
428 0 : check_with_filler(intset, value,
429 0 : value, filler_min, filler_max);
430 0 : check_with_filler(intset, value + 1,
431 0 : value, filler_min, filler_max);
432 0 : check_with_filler(intset, filler_max - 1,
433 0 : value, filler_min, filler_max);
434 0 : check_with_filler(intset, filler_max,
435 0 : value, filler_min, filler_max);
436 0 : check_with_filler(intset, filler_max + 1,
437 0 : value, filler_min, filler_max);
438 0 : check_with_filler(intset, PG_UINT64_MAX - 1,
439 0 : value, filler_min, filler_max);
440 0 : check_with_filler(intset, PG_UINT64_MAX,
441 0 : value, filler_min, filler_max);
442 :
443 0 : intset_begin_iterate(intset);
444 0 : for (uint64 i = 0; i < n; i++)
445 : {
446 0 : found = intset_iterate_next(intset, &x);
447 0 : if (!found || x != iter_expected[i])
448 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
449 0 : }
450 0 : found = intset_iterate_next(intset, &x);
451 0 : if (found)
452 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
453 :
454 0 : mem_usage = intset_memory_usage(intset);
455 0 : if (mem_usage < 5000 || mem_usage > 500000000)
456 0 : elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
457 0 : }
458 :
459 : /*
460 : * Helper function for test_single_value_and_filler.
461 : *
462 : * Calls intset_is_member() for value 'x', and checks that the result is what
463 : * we expect.
464 : */
465 : static void
466 0 : check_with_filler(IntegerSet *intset, uint64 x,
467 : uint64 value, uint64 filler_min, uint64 filler_max)
468 : {
469 0 : bool expected;
470 0 : bool actual;
471 :
472 0 : expected = (x == value || (filler_min <= x && x < filler_max));
473 :
474 0 : actual = intset_is_member(intset, x);
475 :
476 0 : if (actual != expected)
477 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
478 0 : }
479 :
480 : /*
481 : * Test empty set
482 : */
483 : static void
484 0 : test_empty(void)
485 : {
486 0 : IntegerSet *intset;
487 0 : uint64 x;
488 :
489 0 : elog(NOTICE, "testing intset with empty set");
490 :
491 0 : intset = intset_create();
492 :
493 : /* Test intset_is_member() */
494 0 : if (intset_is_member(intset, 0) != false)
495 0 : elog(ERROR, "intset_is_member on empty set returned true");
496 0 : if (intset_is_member(intset, 1) != false)
497 0 : elog(ERROR, "intset_is_member on empty set returned true");
498 0 : if (intset_is_member(intset, PG_UINT64_MAX) != false)
499 0 : elog(ERROR, "intset_is_member on empty set returned true");
500 :
501 : /* Test iterator */
502 0 : intset_begin_iterate(intset);
503 0 : if (intset_iterate_next(intset, &x))
504 0 : elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
505 0 : }
506 :
507 : /*
508 : * Test with integers that are more than 2^60 apart.
509 : *
510 : * The Simple-8b encoding used by the set implementation can only encode
511 : * values up to 2^60. That makes large differences like this interesting
512 : * to test.
513 : */
514 : static void
515 0 : test_huge_distances(void)
516 : {
517 0 : IntegerSet *intset;
518 0 : uint64 values[1000];
519 0 : int num_values = 0;
520 0 : uint64 val = 0;
521 0 : bool found;
522 0 : uint64 x;
523 :
524 0 : elog(NOTICE, "testing intset with distances > 2^60 between values");
525 :
526 0 : val = 0;
527 0 : values[num_values++] = val;
528 :
529 : /* Test differences on both sides of the 2^60 boundary. */
530 0 : val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
531 0 : values[num_values++] = val;
532 :
533 0 : val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
534 0 : values[num_values++] = val;
535 :
536 0 : val += UINT64CONST(1152921504606846976); /* 2^60 */
537 0 : values[num_values++] = val;
538 :
539 0 : val += UINT64CONST(1152921504606846976); /* 2^60 */
540 0 : values[num_values++] = val;
541 :
542 0 : val += UINT64CONST(1152921504606846976); /* 2^60 */
543 0 : values[num_values++] = val;
544 :
545 0 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
546 0 : values[num_values++] = val;
547 :
548 0 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
549 0 : values[num_values++] = val;
550 :
551 0 : val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
552 0 : values[num_values++] = val;
553 :
554 0 : val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
555 0 : values[num_values++] = val;
556 :
557 0 : val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
558 0 : values[num_values++] = val;
559 :
560 0 : val += UINT64CONST(1152921504606846976); /* 2^60 */
561 0 : values[num_values++] = val;
562 :
563 : /*
564 : * We're now very close to 2^64, so can't add large values anymore. But
565 : * add more smaller values to the end, to make sure that all the above
566 : * values get flushed and packed into the tree structure.
567 : */
568 0 : while (num_values < 1000)
569 : {
570 0 : val += pg_prng_uint32(&pg_global_prng_state);
571 0 : values[num_values++] = val;
572 : }
573 :
574 : /* Create an IntegerSet using these values */
575 0 : intset = intset_create();
576 0 : for (int i = 0; i < num_values; i++)
577 0 : intset_add_member(intset, values[i]);
578 :
579 : /*
580 : * Test intset_is_member() around each of these values
581 : */
582 0 : for (int i = 0; i < num_values; i++)
583 : {
584 0 : uint64 y = values[i];
585 0 : bool expected;
586 0 : bool result;
587 :
588 0 : if (y > 0)
589 : {
590 0 : expected = (values[i - 1] == y - 1);
591 0 : result = intset_is_member(intset, y - 1);
592 0 : if (result != expected)
593 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y - 1);
594 0 : }
595 :
596 0 : result = intset_is_member(intset, y);
597 0 : if (result != true)
598 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y);
599 :
600 0 : expected = (i != num_values - 1) ? (values[i + 1] == y + 1) : false;
601 0 : result = intset_is_member(intset, y + 1);
602 0 : if (result != expected)
603 0 : elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y + 1);
604 0 : }
605 :
606 : /*
607 : * Test iterator
608 : */
609 0 : intset_begin_iterate(intset);
610 0 : for (int i = 0; i < num_values; i++)
611 : {
612 0 : found = intset_iterate_next(intset, &x);
613 0 : if (!found || x != values[i])
614 0 : elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
615 0 : }
616 0 : found = intset_iterate_next(intset, &x);
617 0 : if (found)
618 0 : elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
619 0 : }
|