Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * spgist_name_ops.c
4 : * Test opclass for SP-GiST
5 : *
6 : * This indexes input values of type "name", but the index storage is "text",
7 : * with the same choices as made in the core SP-GiST text_ops opclass.
8 : * Much of the code is identical to src/backend/access/spgist/spgtextproc.c,
9 : * which see for a more detailed header comment.
10 : *
11 : * Unlike spgtextproc.c, we don't bother with collation-aware logic.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/test/modules/spgist_name_ops/spgist_name_ops.c
19 : *
20 : * -------------------------------------------------------------------------
21 : */
22 : #include "postgres.h"
23 :
24 : #include "access/spgist.h"
25 : #include "catalog/pg_type.h"
26 : #include "utils/datum.h"
27 : #include "varatt.h"
28 :
29 0 : PG_MODULE_MAGIC;
30 :
31 :
32 0 : PG_FUNCTION_INFO_V1(spgist_name_config);
33 : Datum
34 0 : spgist_name_config(PG_FUNCTION_ARGS)
35 : {
36 : #ifdef NOT_USED
37 : spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0);
38 : #endif
39 0 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
40 :
41 0 : cfg->prefixType = TEXTOID;
42 0 : cfg->labelType = INT2OID;
43 0 : cfg->leafType = TEXTOID;
44 0 : cfg->canReturnData = true;
45 0 : cfg->longValuesOK = true; /* suffixing will shorten long values */
46 0 : PG_RETURN_VOID();
47 0 : }
48 :
49 : /*
50 : * Form a text datum from the given not-necessarily-null-terminated string,
51 : * using short varlena header format if possible
52 : */
53 : static Datum
54 0 : formTextDatum(const char *data, int datalen)
55 : {
56 0 : char *p;
57 :
58 0 : p = (char *) palloc(datalen + VARHDRSZ);
59 :
60 0 : if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
61 : {
62 0 : SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
63 0 : if (datalen)
64 0 : memcpy(p + VARHDRSZ_SHORT, data, datalen);
65 0 : }
66 : else
67 : {
68 0 : SET_VARSIZE(p, datalen + VARHDRSZ);
69 0 : memcpy(p + VARHDRSZ, data, datalen);
70 : }
71 :
72 0 : return PointerGetDatum(p);
73 0 : }
74 :
75 : /*
76 : * Find the length of the common prefix of a and b
77 : */
78 : static int
79 0 : commonPrefix(const char *a, const char *b, int lena, int lenb)
80 : {
81 0 : int i = 0;
82 :
83 0 : while (i < lena && i < lenb && *a == *b)
84 : {
85 0 : a++;
86 0 : b++;
87 0 : i++;
88 : }
89 :
90 0 : return i;
91 0 : }
92 :
93 : /*
94 : * Binary search an array of int16 datums for a match to c
95 : *
96 : * On success, *i gets the match location; on failure, it gets where to insert
97 : */
98 : static bool
99 0 : searchChar(const Datum *nodeLabels, int nNodes, int16 c, int *i)
100 : {
101 0 : int StopLow = 0,
102 0 : StopHigh = nNodes;
103 :
104 0 : while (StopLow < StopHigh)
105 : {
106 0 : int StopMiddle = (StopLow + StopHigh) >> 1;
107 0 : int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
108 :
109 0 : if (c < middle)
110 0 : StopHigh = StopMiddle;
111 0 : else if (c > middle)
112 0 : StopLow = StopMiddle + 1;
113 : else
114 : {
115 0 : *i = StopMiddle;
116 0 : return true;
117 : }
118 0 : }
119 :
120 0 : *i = StopHigh;
121 0 : return false;
122 0 : }
123 :
124 0 : PG_FUNCTION_INFO_V1(spgist_name_choose);
125 : Datum
126 0 : spgist_name_choose(PG_FUNCTION_ARGS)
127 : {
128 0 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
129 0 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
130 0 : Name inName = DatumGetName(in->datum);
131 0 : char *inStr = NameStr(*inName);
132 0 : int inSize = strlen(inStr);
133 0 : char *prefixStr = NULL;
134 0 : int prefixSize = 0;
135 0 : int commonLen = 0;
136 0 : int16 nodeChar = 0;
137 0 : int i = 0;
138 :
139 : /* Check for prefix match, set nodeChar to first byte after prefix */
140 0 : if (in->hasPrefix)
141 : {
142 0 : text *prefixText = DatumGetTextPP(in->prefixDatum);
143 :
144 0 : prefixStr = VARDATA_ANY(prefixText);
145 0 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
146 :
147 0 : commonLen = commonPrefix(inStr + in->level,
148 0 : prefixStr,
149 0 : inSize - in->level,
150 0 : prefixSize);
151 :
152 0 : if (commonLen == prefixSize)
153 : {
154 0 : if (inSize - in->level > commonLen)
155 0 : nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
156 : else
157 0 : nodeChar = -1;
158 0 : }
159 : else
160 : {
161 : /* Must split tuple because incoming value doesn't match prefix */
162 0 : out->resultType = spgSplitTuple;
163 :
164 0 : if (commonLen == 0)
165 : {
166 0 : out->result.splitTuple.prefixHasPrefix = false;
167 0 : }
168 : else
169 : {
170 0 : out->result.splitTuple.prefixHasPrefix = true;
171 0 : out->result.splitTuple.prefixPrefixDatum =
172 0 : formTextDatum(prefixStr, commonLen);
173 : }
174 0 : out->result.splitTuple.prefixNNodes = 1;
175 0 : out->result.splitTuple.prefixNodeLabels =
176 0 : palloc_object(Datum);
177 0 : out->result.splitTuple.prefixNodeLabels[0] =
178 0 : Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
179 :
180 0 : out->result.splitTuple.childNodeN = 0;
181 :
182 0 : if (prefixSize - commonLen == 1)
183 : {
184 0 : out->result.splitTuple.postfixHasPrefix = false;
185 0 : }
186 : else
187 : {
188 0 : out->result.splitTuple.postfixHasPrefix = true;
189 0 : out->result.splitTuple.postfixPrefixDatum =
190 0 : formTextDatum(prefixStr + commonLen + 1,
191 0 : prefixSize - commonLen - 1);
192 : }
193 :
194 0 : PG_RETURN_VOID();
195 : }
196 0 : }
197 0 : else if (inSize > in->level)
198 : {
199 0 : nodeChar = *(unsigned char *) (inStr + in->level);
200 0 : }
201 : else
202 : {
203 0 : nodeChar = -1;
204 : }
205 :
206 : /* Look up nodeChar in the node label array */
207 0 : if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
208 : {
209 : /*
210 : * Descend to existing node. (If in->allTheSame, the core code will
211 : * ignore our nodeN specification here, but that's OK. We still have
212 : * to provide the correct levelAdd and restDatum values, and those are
213 : * the same regardless of which node gets chosen by core.)
214 : */
215 0 : int levelAdd;
216 :
217 0 : out->resultType = spgMatchNode;
218 0 : out->result.matchNode.nodeN = i;
219 0 : levelAdd = commonLen;
220 0 : if (nodeChar >= 0)
221 0 : levelAdd++;
222 0 : out->result.matchNode.levelAdd = levelAdd;
223 0 : if (inSize - in->level - levelAdd > 0)
224 0 : out->result.matchNode.restDatum =
225 0 : formTextDatum(inStr + in->level + levelAdd,
226 0 : inSize - in->level - levelAdd);
227 : else
228 0 : out->result.matchNode.restDatum =
229 0 : formTextDatum(NULL, 0);
230 0 : }
231 0 : else if (in->allTheSame)
232 : {
233 : /*
234 : * Can't use AddNode action, so split the tuple. The upper tuple has
235 : * the same prefix as before and uses a dummy node label -2 for the
236 : * lower tuple. The lower tuple has no prefix and the same node
237 : * labels as the original tuple.
238 : *
239 : * Note: it might seem tempting to shorten the upper tuple's prefix,
240 : * if it has one, then use its last byte as label for the lower tuple.
241 : * But that doesn't win since we know the incoming value matches the
242 : * whole prefix: we'd just end up splitting the lower tuple again.
243 : */
244 0 : out->resultType = spgSplitTuple;
245 0 : out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
246 0 : out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
247 0 : out->result.splitTuple.prefixNNodes = 1;
248 0 : out->result.splitTuple.prefixNodeLabels = palloc_object(Datum);
249 0 : out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
250 0 : out->result.splitTuple.childNodeN = 0;
251 0 : out->result.splitTuple.postfixHasPrefix = false;
252 0 : }
253 : else
254 : {
255 : /* Add a node for the not-previously-seen nodeChar value */
256 0 : out->resultType = spgAddNode;
257 0 : out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
258 0 : out->result.addNode.nodeN = i;
259 : }
260 :
261 0 : PG_RETURN_VOID();
262 0 : }
263 :
264 : /* The picksplit function is identical to the core opclass, so just use that */
265 :
266 0 : PG_FUNCTION_INFO_V1(spgist_name_inner_consistent);
267 : Datum
268 0 : spgist_name_inner_consistent(PG_FUNCTION_ARGS)
269 : {
270 0 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
271 0 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
272 0 : text *reconstructedValue;
273 0 : text *reconstrText;
274 0 : int maxReconstrLen;
275 0 : text *prefixText = NULL;
276 0 : int prefixSize = 0;
277 0 : int i;
278 :
279 : /*
280 : * Reconstruct values represented at this tuple, including parent data,
281 : * prefix of this tuple if any, and the node label if it's non-dummy.
282 : * in->level should be the length of the previously reconstructed value,
283 : * and the number of bytes added here is prefixSize or prefixSize + 1.
284 : *
285 : * Recall that reconstructedValues are assumed to be the same type as leaf
286 : * datums, so we must use "text" not "name" for them.
287 : *
288 : * Note: we assume that in->reconstructedValue isn't toasted and doesn't
289 : * have a short varlena header. This is okay because it must have been
290 : * created by a previous invocation of this routine, and we always emit
291 : * long-format reconstructed values.
292 : */
293 0 : reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
294 0 : Assert(reconstructedValue == NULL ? in->level == 0 :
295 : VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
296 :
297 0 : maxReconstrLen = in->level + 1;
298 0 : if (in->hasPrefix)
299 : {
300 0 : prefixText = DatumGetTextPP(in->prefixDatum);
301 0 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
302 0 : maxReconstrLen += prefixSize;
303 0 : }
304 :
305 0 : reconstrText = palloc(VARHDRSZ + maxReconstrLen);
306 0 : SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
307 :
308 0 : if (in->level)
309 0 : memcpy(VARDATA(reconstrText),
310 : VARDATA(reconstructedValue),
311 : in->level);
312 0 : if (prefixSize)
313 0 : memcpy(((char *) VARDATA(reconstrText)) + in->level,
314 : VARDATA_ANY(prefixText),
315 : prefixSize);
316 : /* last byte of reconstrText will be filled in below */
317 :
318 : /*
319 : * Scan the child nodes. For each one, complete the reconstructed value
320 : * and see if it's consistent with the query. If so, emit an entry into
321 : * the output arrays.
322 : */
323 0 : out->nodeNumbers = palloc_array(int, in->nNodes);
324 0 : out->levelAdds = palloc_array(int, in->nNodes);
325 0 : out->reconstructedValues = palloc_array(Datum, in->nNodes);
326 0 : out->nNodes = 0;
327 :
328 0 : for (i = 0; i < in->nNodes; i++)
329 : {
330 0 : int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
331 0 : int thisLen;
332 0 : bool res = true;
333 0 : int j;
334 :
335 : /* If nodeChar is a dummy value, don't include it in data */
336 0 : if (nodeChar <= 0)
337 0 : thisLen = maxReconstrLen - 1;
338 : else
339 : {
340 0 : ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
341 0 : thisLen = maxReconstrLen;
342 : }
343 :
344 0 : for (j = 0; j < in->nkeys; j++)
345 : {
346 0 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
347 0 : Name inName;
348 0 : char *inStr;
349 0 : int inSize;
350 0 : int r;
351 :
352 0 : inName = DatumGetName(in->scankeys[j].sk_argument);
353 0 : inStr = NameStr(*inName);
354 0 : inSize = strlen(inStr);
355 :
356 0 : r = memcmp(VARDATA(reconstrText), inStr,
357 0 : Min(inSize, thisLen));
358 :
359 0 : switch (strategy)
360 : {
361 : case BTLessStrategyNumber:
362 : case BTLessEqualStrategyNumber:
363 0 : if (r > 0)
364 0 : res = false;
365 0 : break;
366 : case BTEqualStrategyNumber:
367 0 : if (r != 0 || inSize < thisLen)
368 0 : res = false;
369 0 : break;
370 : case BTGreaterEqualStrategyNumber:
371 : case BTGreaterStrategyNumber:
372 0 : if (r < 0)
373 0 : res = false;
374 0 : break;
375 : default:
376 0 : elog(ERROR, "unrecognized strategy number: %d",
377 : in->scankeys[j].sk_strategy);
378 0 : break;
379 : }
380 :
381 0 : if (!res)
382 0 : break; /* no need to consider remaining conditions */
383 0 : }
384 :
385 0 : if (res)
386 : {
387 0 : out->nodeNumbers[out->nNodes] = i;
388 0 : out->levelAdds[out->nNodes] = thisLen - in->level;
389 0 : SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
390 0 : out->reconstructedValues[out->nNodes] =
391 0 : datumCopy(PointerGetDatum(reconstrText), false, -1);
392 0 : out->nNodes++;
393 0 : }
394 0 : }
395 :
396 0 : PG_RETURN_VOID();
397 0 : }
398 :
399 0 : PG_FUNCTION_INFO_V1(spgist_name_leaf_consistent);
400 : Datum
401 0 : spgist_name_leaf_consistent(PG_FUNCTION_ARGS)
402 : {
403 0 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
404 0 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
405 0 : int level = in->level;
406 0 : text *leafValue,
407 0 : *reconstrValue = NULL;
408 0 : char *fullValue;
409 0 : int fullLen;
410 0 : bool res;
411 0 : int j;
412 :
413 : /* all tests are exact */
414 0 : out->recheck = false;
415 :
416 0 : leafValue = DatumGetTextPP(in->leafDatum);
417 :
418 : /* As above, in->reconstructedValue isn't toasted or short. */
419 0 : if (DatumGetPointer(in->reconstructedValue))
420 0 : reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
421 :
422 0 : Assert(reconstrValue == NULL ? level == 0 :
423 : VARSIZE_ANY_EXHDR(reconstrValue) == level);
424 :
425 : /* Reconstruct the Name represented by this leaf tuple */
426 0 : fullValue = palloc0(NAMEDATALEN);
427 0 : fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
428 0 : Assert(fullLen < NAMEDATALEN);
429 0 : if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
430 : {
431 0 : memcpy(fullValue, VARDATA(reconstrValue),
432 : VARSIZE_ANY_EXHDR(reconstrValue));
433 0 : }
434 : else
435 : {
436 0 : if (level)
437 0 : memcpy(fullValue, VARDATA(reconstrValue), level);
438 0 : if (VARSIZE_ANY_EXHDR(leafValue) > 0)
439 0 : memcpy(fullValue + level, VARDATA_ANY(leafValue),
440 : VARSIZE_ANY_EXHDR(leafValue));
441 : }
442 0 : out->leafValue = PointerGetDatum(fullValue);
443 :
444 : /* Perform the required comparison(s) */
445 0 : res = true;
446 0 : for (j = 0; j < in->nkeys; j++)
447 : {
448 0 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
449 0 : Name queryName = DatumGetName(in->scankeys[j].sk_argument);
450 0 : char *queryStr = NameStr(*queryName);
451 0 : int queryLen = strlen(queryStr);
452 0 : int r;
453 :
454 : /* Non-collation-aware comparison */
455 0 : r = memcmp(fullValue, queryStr, Min(queryLen, fullLen));
456 :
457 0 : if (r == 0)
458 : {
459 0 : if (queryLen > fullLen)
460 0 : r = -1;
461 0 : else if (queryLen < fullLen)
462 0 : r = 1;
463 0 : }
464 :
465 0 : switch (strategy)
466 : {
467 : case BTLessStrategyNumber:
468 0 : res = (r < 0);
469 0 : break;
470 : case BTLessEqualStrategyNumber:
471 0 : res = (r <= 0);
472 0 : break;
473 : case BTEqualStrategyNumber:
474 0 : res = (r == 0);
475 0 : break;
476 : case BTGreaterEqualStrategyNumber:
477 0 : res = (r >= 0);
478 0 : break;
479 : case BTGreaterStrategyNumber:
480 0 : res = (r > 0);
481 0 : break;
482 : default:
483 0 : elog(ERROR, "unrecognized strategy number: %d",
484 : in->scankeys[j].sk_strategy);
485 0 : res = false;
486 0 : break;
487 : }
488 :
489 0 : if (!res)
490 0 : break; /* no need to consider remaining conditions */
491 0 : }
492 :
493 0 : PG_RETURN_BOOL(res);
494 0 : }
495 :
496 0 : PG_FUNCTION_INFO_V1(spgist_name_compress);
497 : Datum
498 0 : spgist_name_compress(PG_FUNCTION_ARGS)
499 : {
500 0 : Name inName = PG_GETARG_NAME(0);
501 0 : char *inStr = NameStr(*inName);
502 :
503 0 : PG_RETURN_DATUM(formTextDatum(inStr, strlen(inStr)));
504 0 : }
|