Line data Source code
1 : /*
2 : * contrib/pg_trgm/trgm_gin.c
3 : */
4 : #include "postgres.h"
5 :
6 : #include "access/gin.h"
7 : #include "access/stratnum.h"
8 : #include "fmgr.h"
9 : #include "trgm.h"
10 : #include "varatt.h"
11 :
12 0 : PG_FUNCTION_INFO_V1(gin_extract_trgm);
13 0 : PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
14 0 : PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
15 0 : PG_FUNCTION_INFO_V1(gin_trgm_consistent);
16 0 : PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
17 :
18 : /*
19 : * This function can only be called if a pre-9.1 version of the GIN operator
20 : * class definition is present in the catalogs (probably as a consequence
21 : * of upgrade-in-place). Cope.
22 : */
23 : Datum
24 0 : gin_extract_trgm(PG_FUNCTION_ARGS)
25 : {
26 0 : if (PG_NARGS() == 3)
27 0 : return gin_extract_value_trgm(fcinfo);
28 0 : if (PG_NARGS() == 7)
29 0 : return gin_extract_query_trgm(fcinfo);
30 0 : elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
31 0 : PG_RETURN_NULL();
32 0 : }
33 :
34 : Datum
35 0 : gin_extract_value_trgm(PG_FUNCTION_ARGS)
36 : {
37 0 : text *val = (text *) PG_GETARG_TEXT_PP(0);
38 0 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
39 0 : Datum *entries = NULL;
40 0 : TRGM *trg;
41 0 : int32 trglen;
42 :
43 0 : *nentries = 0;
44 :
45 0 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
46 0 : trglen = ARRNELEM(trg);
47 :
48 0 : if (trglen > 0)
49 : {
50 0 : trgm *ptr;
51 0 : int32 i;
52 :
53 0 : *nentries = trglen;
54 0 : entries = palloc_array(Datum, trglen);
55 :
56 0 : ptr = GETARR(trg);
57 0 : for (i = 0; i < trglen; i++)
58 : {
59 0 : int32 item = trgm2int(ptr);
60 :
61 0 : entries[i] = Int32GetDatum(item);
62 0 : ptr++;
63 0 : }
64 0 : }
65 :
66 0 : PG_RETURN_POINTER(entries);
67 0 : }
68 :
69 : Datum
70 0 : gin_extract_query_trgm(PG_FUNCTION_ARGS)
71 : {
72 0 : text *val = (text *) PG_GETARG_TEXT_PP(0);
73 0 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
74 0 : StrategyNumber strategy = PG_GETARG_UINT16(2);
75 : #ifdef NOT_USED
76 : bool **pmatch = (bool **) PG_GETARG_POINTER(3);
77 : #endif
78 0 : Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
79 : #ifdef NOT_USED
80 : bool **nullFlags = (bool **) PG_GETARG_POINTER(5);
81 : #endif
82 0 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
83 0 : Datum *entries = NULL;
84 0 : TRGM *trg;
85 0 : int32 trglen;
86 0 : trgm *ptr;
87 0 : TrgmPackedGraph *graph;
88 0 : int32 i;
89 :
90 0 : switch (strategy)
91 : {
92 : case SimilarityStrategyNumber:
93 : case WordSimilarityStrategyNumber:
94 : case StrictWordSimilarityStrategyNumber:
95 : case EqualStrategyNumber:
96 0 : trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
97 0 : break;
98 : case ILikeStrategyNumber:
99 : #ifndef IGNORECASE
100 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
101 : #endif
102 : /* FALL THRU */
103 : case LikeStrategyNumber:
104 :
105 : /*
106 : * For wildcard search we extract all the trigrams that every
107 : * potentially-matching string must include.
108 : */
109 0 : trg = generate_wildcard_trgm(VARDATA_ANY(val),
110 0 : VARSIZE_ANY_EXHDR(val));
111 0 : break;
112 : case RegExpICaseStrategyNumber:
113 : #ifndef IGNORECASE
114 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
115 : #endif
116 : /* FALL THRU */
117 : case RegExpStrategyNumber:
118 0 : trg = createTrgmNFA(val, PG_GET_COLLATION(),
119 0 : &graph, CurrentMemoryContext);
120 0 : if (trg && ARRNELEM(trg) > 0)
121 : {
122 : /*
123 : * Successful regex processing: store NFA-like graph as
124 : * extra_data. GIN API requires an array of nentries
125 : * Pointers, but we just put the same value in each element.
126 : */
127 0 : trglen = ARRNELEM(trg);
128 0 : *extra_data = palloc_array(Pointer, trglen);
129 0 : for (i = 0; i < trglen; i++)
130 0 : (*extra_data)[i] = (Pointer) graph;
131 0 : }
132 : else
133 : {
134 : /* No result: have to do full index scan. */
135 0 : *nentries = 0;
136 0 : *searchMode = GIN_SEARCH_MODE_ALL;
137 0 : PG_RETURN_POINTER(entries);
138 : }
139 0 : break;
140 : default:
141 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
142 0 : trg = NULL; /* keep compiler quiet */
143 0 : break;
144 : }
145 :
146 0 : trglen = ARRNELEM(trg);
147 0 : *nentries = trglen;
148 :
149 0 : if (trglen > 0)
150 : {
151 0 : entries = palloc_array(Datum, trglen);
152 0 : ptr = GETARR(trg);
153 0 : for (i = 0; i < trglen; i++)
154 : {
155 0 : int32 item = trgm2int(ptr);
156 :
157 0 : entries[i] = Int32GetDatum(item);
158 0 : ptr++;
159 0 : }
160 0 : }
161 :
162 : /*
163 : * If no trigram was extracted then we have to scan all the index.
164 : */
165 0 : if (trglen == 0)
166 0 : *searchMode = GIN_SEARCH_MODE_ALL;
167 :
168 0 : PG_RETURN_POINTER(entries);
169 0 : }
170 :
171 : Datum
172 0 : gin_trgm_consistent(PG_FUNCTION_ARGS)
173 : {
174 0 : bool *check = (bool *) PG_GETARG_POINTER(0);
175 0 : StrategyNumber strategy = PG_GETARG_UINT16(1);
176 : #ifdef NOT_USED
177 : text *query = PG_GETARG_TEXT_PP(2);
178 : #endif
179 0 : int32 nkeys = PG_GETARG_INT32(3);
180 0 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
181 0 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
182 0 : bool res;
183 0 : int32 i,
184 : ntrue;
185 0 : double nlimit;
186 :
187 : /* All cases served by this function are inexact */
188 0 : *recheck = true;
189 :
190 0 : switch (strategy)
191 : {
192 : case SimilarityStrategyNumber:
193 : case WordSimilarityStrategyNumber:
194 : case StrictWordSimilarityStrategyNumber:
195 0 : nlimit = index_strategy_get_limit(strategy);
196 :
197 : /* Count the matches */
198 0 : ntrue = 0;
199 0 : for (i = 0; i < nkeys; i++)
200 : {
201 0 : if (check[i])
202 0 : ntrue++;
203 0 : }
204 :
205 : /*--------------------
206 : * If DIVUNION is defined then similarity formula is:
207 : * c / (len1 + len2 - c)
208 : * where c is number of common trigrams and it stands as ntrue in
209 : * this code. Here we don't know value of len2 but we can assume
210 : * that c (ntrue) is a lower bound of len2, so upper bound of
211 : * similarity is:
212 : * c / (len1 + c - c) => c / len1
213 : * If DIVUNION is not defined then similarity formula is:
214 : * c / max(len1, len2)
215 : * And again, c (ntrue) is a lower bound of len2, but c <= len1
216 : * just by definition and, consequently, upper bound of
217 : * similarity is just c / len1.
218 : * So, independently on DIVUNION the upper bound formula is the same.
219 : */
220 0 : res = (nkeys == 0) ? false :
221 0 : (((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
222 0 : break;
223 : case ILikeStrategyNumber:
224 : #ifndef IGNORECASE
225 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
226 : #endif
227 : /* FALL THRU */
228 : case LikeStrategyNumber:
229 : case EqualStrategyNumber:
230 : /* Check if all extracted trigrams are presented. */
231 0 : res = true;
232 0 : for (i = 0; i < nkeys; i++)
233 : {
234 0 : if (!check[i])
235 : {
236 0 : res = false;
237 0 : break;
238 : }
239 0 : }
240 0 : break;
241 : case RegExpICaseStrategyNumber:
242 : #ifndef IGNORECASE
243 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
244 : #endif
245 : /* FALL THRU */
246 : case RegExpStrategyNumber:
247 0 : if (nkeys < 1)
248 : {
249 : /* Regex processing gave no result: do full index scan */
250 0 : res = true;
251 0 : }
252 : else
253 0 : res = trigramsMatchGraph(extra_data[0], check);
254 0 : break;
255 : default:
256 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
257 0 : res = false; /* keep compiler quiet */
258 0 : break;
259 : }
260 :
261 0 : PG_RETURN_BOOL(res);
262 0 : }
263 :
264 : /*
265 : * In all cases, GIN_TRUE is at least as favorable to inclusion as
266 : * GIN_MAYBE. If no better option is available, simply treat
267 : * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
268 : * consistent function.
269 : */
270 : Datum
271 0 : gin_trgm_triconsistent(PG_FUNCTION_ARGS)
272 : {
273 0 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
274 0 : StrategyNumber strategy = PG_GETARG_UINT16(1);
275 : #ifdef NOT_USED
276 : text *query = PG_GETARG_TEXT_PP(2);
277 : #endif
278 0 : int32 nkeys = PG_GETARG_INT32(3);
279 0 : Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
280 0 : GinTernaryValue res = GIN_MAYBE;
281 0 : int32 i,
282 : ntrue;
283 0 : bool *boolcheck;
284 0 : double nlimit;
285 :
286 0 : switch (strategy)
287 : {
288 : case SimilarityStrategyNumber:
289 : case WordSimilarityStrategyNumber:
290 : case StrictWordSimilarityStrategyNumber:
291 0 : nlimit = index_strategy_get_limit(strategy);
292 :
293 : /* Count the matches */
294 0 : ntrue = 0;
295 0 : for (i = 0; i < nkeys; i++)
296 : {
297 0 : if (check[i] != GIN_FALSE)
298 0 : ntrue++;
299 0 : }
300 :
301 : /*
302 : * See comment in gin_trgm_consistent() about * upper bound
303 : * formula
304 : */
305 0 : res = (nkeys == 0)
306 0 : ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
307 : ? GIN_MAYBE : GIN_FALSE);
308 0 : break;
309 : case ILikeStrategyNumber:
310 : #ifndef IGNORECASE
311 : elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
312 : #endif
313 : /* FALL THRU */
314 : case LikeStrategyNumber:
315 : case EqualStrategyNumber:
316 : /* Check if all extracted trigrams are presented. */
317 0 : res = GIN_MAYBE;
318 0 : for (i = 0; i < nkeys; i++)
319 : {
320 0 : if (check[i] == GIN_FALSE)
321 : {
322 0 : res = GIN_FALSE;
323 0 : break;
324 : }
325 0 : }
326 0 : break;
327 : case RegExpICaseStrategyNumber:
328 : #ifndef IGNORECASE
329 : elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
330 : #endif
331 : /* FALL THRU */
332 : case RegExpStrategyNumber:
333 0 : if (nkeys < 1)
334 : {
335 : /* Regex processing gave no result: do full index scan */
336 0 : res = GIN_MAYBE;
337 0 : }
338 : else
339 : {
340 : /*
341 : * As trigramsMatchGraph implements a monotonic boolean
342 : * function, promoting all GIN_MAYBE keys to GIN_TRUE will
343 : * give a conservative result.
344 : */
345 0 : boolcheck = palloc_array(bool, nkeys);
346 0 : for (i = 0; i < nkeys; i++)
347 0 : boolcheck[i] = (check[i] != GIN_FALSE);
348 0 : if (!trigramsMatchGraph(extra_data[0], boolcheck))
349 0 : res = GIN_FALSE;
350 0 : pfree(boolcheck);
351 : }
352 0 : break;
353 : default:
354 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
355 0 : res = GIN_FALSE; /* keep compiler quiet */
356 0 : break;
357 : }
358 :
359 : /* All cases served by this function are inexact */
360 0 : Assert(res != GIN_TRUE);
361 0 : PG_RETURN_GIN_TERNARY_VALUE(res);
362 0 : }
|