LCOV - code coverage report
Current view: top level - contrib/pg_trgm - trgm_op.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 624 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 55 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * contrib/pg_trgm/trgm_op.c
       3              :  */
       4              : #include "postgres.h"
       5              : 
       6              : #include <ctype.h>
       7              : 
       8              : #include "catalog/pg_collation_d.h"
       9              : #include "catalog/pg_type.h"
      10              : #include "common/int.h"
      11              : #include "lib/qunique.h"
      12              : #include "miscadmin.h"
      13              : #include "trgm.h"
      14              : #include "tsearch/ts_locale.h"
      15              : #include "utils/formatting.h"
      16              : #include "utils/guc.h"
      17              : #include "utils/lsyscache.h"
      18              : #include "utils/memutils.h"
      19              : #include "utils/pg_crc.h"
      20              : 
      21            0 : PG_MODULE_MAGIC_EXT(
      22              :                                         .name = "pg_trgm",
      23              :                                         .version = PG_VERSION
      24              : );
      25              : 
      26              : /* GUC variables */
      27              : double          similarity_threshold = 0.3f;
      28              : double          word_similarity_threshold = 0.6f;
      29              : double          strict_word_similarity_threshold = 0.5f;
      30              : 
      31            0 : PG_FUNCTION_INFO_V1(set_limit);
      32            0 : PG_FUNCTION_INFO_V1(show_limit);
      33            0 : PG_FUNCTION_INFO_V1(show_trgm);
      34            0 : PG_FUNCTION_INFO_V1(similarity);
      35            0 : PG_FUNCTION_INFO_V1(word_similarity);
      36            0 : PG_FUNCTION_INFO_V1(strict_word_similarity);
      37            0 : PG_FUNCTION_INFO_V1(similarity_dist);
      38            0 : PG_FUNCTION_INFO_V1(similarity_op);
      39            0 : PG_FUNCTION_INFO_V1(word_similarity_op);
      40            0 : PG_FUNCTION_INFO_V1(word_similarity_commutator_op);
      41            0 : PG_FUNCTION_INFO_V1(word_similarity_dist_op);
      42            0 : PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op);
      43            0 : PG_FUNCTION_INFO_V1(strict_word_similarity_op);
      44            0 : PG_FUNCTION_INFO_V1(strict_word_similarity_commutator_op);
      45            0 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_op);
      46            0 : PG_FUNCTION_INFO_V1(strict_word_similarity_dist_commutator_op);
      47              : 
      48              : static int      CMPTRGM_CHOOSE(const void *a, const void *b);
      49              : int                     (*CMPTRGM) (const void *a, const void *b) = CMPTRGM_CHOOSE;
      50              : 
      51              : /* Trigram with position */
      52              : typedef struct
      53              : {
      54              :         trgm            trg;
      55              :         int                     index;
      56              : } pos_trgm;
      57              : 
      58              : /* Trigram bound type */
      59              : typedef uint8 TrgmBound;
      60              : #define TRGM_BOUND_LEFT                         0x01    /* trigram is left bound of word */
      61              : #define TRGM_BOUND_RIGHT                        0x02    /* trigram is right bound of word */
      62              : 
      63              : /* Word similarity flags */
      64              : #define WORD_SIMILARITY_CHECK_ONLY      0x01    /* only check existence of similar
      65              :                                                                                          * search pattern in text */
      66              : #define WORD_SIMILARITY_STRICT          0x02    /* force bounds of extent to match
      67              :                                                                                          * word bounds */
      68              : 
      69              : /*
      70              :  * Module load callback
      71              :  */
      72              : void
      73            0 : _PG_init(void)
      74              : {
      75              :         /* Define custom GUC variables. */
      76            0 :         DefineCustomRealVariable("pg_trgm.similarity_threshold",
      77              :                                                          "Sets the threshold used by the % operator.",
      78              :                                                          "Valid range is 0.0 .. 1.0.",
      79              :                                                          &similarity_threshold,
      80              :                                                          0.3f,
      81              :                                                          0.0,
      82              :                                                          1.0,
      83              :                                                          PGC_USERSET,
      84              :                                                          0,
      85              :                                                          NULL,
      86              :                                                          NULL,
      87              :                                                          NULL);
      88            0 :         DefineCustomRealVariable("pg_trgm.word_similarity_threshold",
      89              :                                                          "Sets the threshold used by the <% operator.",
      90              :                                                          "Valid range is 0.0 .. 1.0.",
      91              :                                                          &word_similarity_threshold,
      92              :                                                          0.6f,
      93              :                                                          0.0,
      94              :                                                          1.0,
      95              :                                                          PGC_USERSET,
      96              :                                                          0,
      97              :                                                          NULL,
      98              :                                                          NULL,
      99              :                                                          NULL);
     100            0 :         DefineCustomRealVariable("pg_trgm.strict_word_similarity_threshold",
     101              :                                                          "Sets the threshold used by the <<% operator.",
     102              :                                                          "Valid range is 0.0 .. 1.0.",
     103              :                                                          &strict_word_similarity_threshold,
     104              :                                                          0.5f,
     105              :                                                          0.0,
     106              :                                                          1.0,
     107              :                                                          PGC_USERSET,
     108              :                                                          0,
     109              :                                                          NULL,
     110              :                                                          NULL,
     111              :                                                          NULL);
     112              : 
     113            0 :         MarkGUCPrefixReserved("pg_trgm");
     114            0 : }
     115              : 
     116              : #define CMPCHAR(a,b) ( ((a)==(b)) ? 0 : ( ((a)<(b)) ? -1 : 1 ) )
     117              : 
     118              : /*
     119              :  * Functions for comparing two trgms while treating each char as "signed char" or
     120              :  * "unsigned char".
     121              :  */
     122              : static inline int
     123            0 : CMPTRGM_SIGNED(const void *a, const void *b)
     124              : {
     125              : #define CMPPCHAR_S(a,b,i)  CMPCHAR( *(((const signed char*)(a))+i), *(((const signed char*)(b))+i) )
     126              : 
     127            0 :         return CMPPCHAR_S(a, b, 0) ? CMPPCHAR_S(a, b, 0)
     128            0 :                 : (CMPPCHAR_S(a, b, 1) ? CMPPCHAR_S(a, b, 1)
     129            0 :                    : CMPPCHAR_S(a, b, 2));
     130              : }
     131              : 
     132              : static inline int
     133            0 : CMPTRGM_UNSIGNED(const void *a, const void *b)
     134              : {
     135              : #define CMPPCHAR_UNS(a,b,i)  CMPCHAR( *(((const unsigned char*)(a))+i), *(((const unsigned char*)(b))+i) )
     136              : 
     137            0 :         return CMPPCHAR_UNS(a, b, 0) ? CMPPCHAR_UNS(a, b, 0)
     138            0 :                 : (CMPPCHAR_UNS(a, b, 1) ? CMPPCHAR_UNS(a, b, 1)
     139            0 :                    : CMPPCHAR_UNS(a, b, 2));
     140              : }
     141              : 
     142              : /*
     143              :  * This gets called on the first call. It replaces the function pointer so
     144              :  * that subsequent calls are routed directly to the chosen implementation.
     145              :  */
     146              : static int
     147            0 : CMPTRGM_CHOOSE(const void *a, const void *b)
     148              : {
     149            0 :         if (GetDefaultCharSignedness())
     150            0 :                 CMPTRGM = CMPTRGM_SIGNED;
     151              :         else
     152            0 :                 CMPTRGM = CMPTRGM_UNSIGNED;
     153              : 
     154            0 :         return CMPTRGM(a, b);
     155              : }
     156              : 
     157              : /*
     158              :  * Deprecated function.
     159              :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     160              :  */
     161              : Datum
     162            0 : set_limit(PG_FUNCTION_ARGS)
     163              : {
     164            0 :         float4          nlimit = PG_GETARG_FLOAT4(0);
     165            0 :         char       *nlimit_str;
     166            0 :         Oid                     func_out_oid;
     167            0 :         bool            is_varlena;
     168              : 
     169            0 :         getTypeOutputInfo(FLOAT4OID, &func_out_oid, &is_varlena);
     170              : 
     171            0 :         nlimit_str = OidOutputFunctionCall(func_out_oid, Float4GetDatum(nlimit));
     172              : 
     173            0 :         SetConfigOption("pg_trgm.similarity_threshold", nlimit_str,
     174              :                                         PGC_USERSET, PGC_S_SESSION);
     175              : 
     176            0 :         PG_RETURN_FLOAT4(similarity_threshold);
     177            0 : }
     178              : 
     179              : 
     180              : /*
     181              :  * Get similarity threshold for given index scan strategy number.
     182              :  */
     183              : double
     184            0 : index_strategy_get_limit(StrategyNumber strategy)
     185              : {
     186            0 :         switch (strategy)
     187              :         {
     188              :                 case SimilarityStrategyNumber:
     189            0 :                         return similarity_threshold;
     190              :                 case WordSimilarityStrategyNumber:
     191            0 :                         return word_similarity_threshold;
     192              :                 case StrictWordSimilarityStrategyNumber:
     193            0 :                         return strict_word_similarity_threshold;
     194              :                 default:
     195            0 :                         elog(ERROR, "unrecognized strategy number: %d", strategy);
     196            0 :                         break;
     197              :         }
     198              : 
     199            0 :         return 0.0;                                     /* keep compiler quiet */
     200            0 : }
     201              : 
     202              : /*
     203              :  * Deprecated function.
     204              :  * Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
     205              :  */
     206              : Datum
     207            0 : show_limit(PG_FUNCTION_ARGS)
     208              : {
     209            0 :         PG_RETURN_FLOAT4(similarity_threshold);
     210              : }
     211              : 
     212              : static int
     213            0 : comp_trgm(const void *a, const void *b)
     214              : {
     215            0 :         return CMPTRGM(a, b);
     216              : }
     217              : 
     218              : /*
     219              :  * Finds first word in string, returns pointer to the word,
     220              :  * endword points to the character after word
     221              :  */
     222              : static char *
     223            0 : find_word(char *str, int lenstr, char **endword, int *charlen)
     224              : {
     225            0 :         char       *beginword = str;
     226              : 
     227            0 :         while (beginword - str < lenstr && !ISWORDCHR(beginword))
     228            0 :                 beginword += pg_mblen(beginword);
     229              : 
     230            0 :         if (beginword - str >= lenstr)
     231            0 :                 return NULL;
     232              : 
     233            0 :         *endword = beginword;
     234            0 :         *charlen = 0;
     235            0 :         while (*endword - str < lenstr && ISWORDCHR(*endword))
     236              :         {
     237            0 :                 *endword += pg_mblen(*endword);
     238            0 :                 (*charlen)++;
     239              :         }
     240              : 
     241            0 :         return beginword;
     242            0 : }
     243              : 
     244              : /*
     245              :  * Reduce a trigram (three possibly multi-byte characters) to a trgm,
     246              :  * which is always exactly three bytes.  If we have three single-byte
     247              :  * characters, we just use them as-is; otherwise we form a hash value.
     248              :  */
     249              : void
     250            0 : compact_trigram(trgm *tptr, char *str, int bytelen)
     251              : {
     252            0 :         if (bytelen == 3)
     253              :         {
     254            0 :                 CPTRGM(tptr, str);
     255            0 :         }
     256              :         else
     257              :         {
     258            0 :                 pg_crc32        crc;
     259              : 
     260            0 :                 INIT_LEGACY_CRC32(crc);
     261            0 :                 COMP_LEGACY_CRC32(crc, str, bytelen);
     262            0 :                 FIN_LEGACY_CRC32(crc);
     263              : 
     264              :                 /*
     265              :                  * use only 3 upper bytes from crc, hope, it's good enough hashing
     266              :                  */
     267            0 :                 CPTRGM(tptr, &crc);
     268            0 :         }
     269            0 : }
     270              : 
     271              : /*
     272              :  * Adds trigrams from words (already padded).
     273              :  */
     274              : static trgm *
     275            0 : make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
     276              : {
     277            0 :         char       *ptr = str;
     278              : 
     279            0 :         if (charlen < 3)
     280            0 :                 return tptr;
     281              : 
     282            0 :         if (bytelen > charlen)
     283              :         {
     284              :                 /* Find multibyte character boundaries and apply compact_trigram */
     285            0 :                 int                     lenfirst = pg_mblen(str),
     286            0 :                                         lenmiddle = pg_mblen(str + lenfirst),
     287            0 :                                         lenlast = pg_mblen(str + lenfirst + lenmiddle);
     288              : 
     289            0 :                 while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen)
     290              :                 {
     291            0 :                         compact_trigram(tptr, ptr, lenfirst + lenmiddle + lenlast);
     292              : 
     293            0 :                         ptr += lenfirst;
     294            0 :                         tptr++;
     295              : 
     296            0 :                         lenfirst = lenmiddle;
     297            0 :                         lenmiddle = lenlast;
     298            0 :                         lenlast = pg_mblen(ptr + lenfirst + lenmiddle);
     299              :                 }
     300            0 :         }
     301              :         else
     302              :         {
     303              :                 /* Fast path when there are no multibyte characters */
     304            0 :                 Assert(bytelen == charlen);
     305              : 
     306            0 :                 while (ptr - str < bytelen - 2 /* number of trigrams = strlen - 2 */ )
     307              :                 {
     308            0 :                         CPTRGM(tptr, ptr);
     309            0 :                         ptr++;
     310            0 :                         tptr++;
     311              :                 }
     312              :         }
     313              : 
     314            0 :         return tptr;
     315            0 : }
     316              : 
     317              : /*
     318              :  * Make array of trigrams without sorting and removing duplicate items.
     319              :  *
     320              :  * trg: where to return the array of trigrams.
     321              :  * str: source string, of length slen bytes.
     322              :  * bounds: where to return bounds of trigrams (if needed).
     323              :  *
     324              :  * Returns length of the generated array.
     325              :  */
     326              : static int
     327            0 : generate_trgm_only(trgm *trg, char *str, int slen, TrgmBound *bounds)
     328              : {
     329            0 :         trgm       *tptr;
     330            0 :         char       *buf;
     331            0 :         int                     charlen,
     332              :                                 bytelen;
     333            0 :         char       *bword,
     334              :                            *eword;
     335              : 
     336            0 :         if (slen + LPADDING + RPADDING < 3 || slen == 0)
     337            0 :                 return 0;
     338              : 
     339            0 :         tptr = trg;
     340              : 
     341              :         /* Allocate a buffer for case-folded, blank-padded words */
     342            0 :         buf = (char *) palloc(slen * pg_database_encoding_max_length() + 4);
     343              : 
     344              :         if (LPADDING > 0)
     345              :         {
     346            0 :                 *buf = ' ';
     347              :                 if (LPADDING > 1)
     348            0 :                         *(buf + 1) = ' ';
     349              :         }
     350              : 
     351            0 :         eword = str;
     352            0 :         while ((bword = find_word(eword, slen - (eword - str), &eword, &charlen)) != NULL)
     353              :         {
     354              : #ifdef IGNORECASE
     355            0 :                 bword = str_tolower(bword, eword - bword, DEFAULT_COLLATION_OID);
     356            0 :                 bytelen = strlen(bword);
     357              : #else
     358              :                 bytelen = eword - bword;
     359              : #endif
     360              : 
     361            0 :                 memcpy(buf + LPADDING, bword, bytelen);
     362              : 
     363              : #ifdef IGNORECASE
     364            0 :                 pfree(bword);
     365              : #endif
     366              : 
     367            0 :                 buf[LPADDING + bytelen] = ' ';
     368            0 :                 buf[LPADDING + bytelen + 1] = ' ';
     369              : 
     370              :                 /* Calculate trigrams marking their bounds if needed */
     371            0 :                 if (bounds)
     372            0 :                         bounds[tptr - trg] |= TRGM_BOUND_LEFT;
     373            0 :                 tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING,
     374            0 :                                                          charlen + LPADDING + RPADDING);
     375            0 :                 if (bounds)
     376            0 :                         bounds[tptr - trg - 1] |= TRGM_BOUND_RIGHT;
     377              :         }
     378              : 
     379            0 :         pfree(buf);
     380              : 
     381            0 :         return tptr - trg;
     382            0 : }
     383              : 
     384              : /*
     385              :  * Guard against possible overflow in the palloc requests below.  (We
     386              :  * don't worry about the additive constants, since palloc can detect
     387              :  * requests that are a little above MaxAllocSize --- we just need to
     388              :  * prevent integer overflow in the multiplications.)
     389              :  */
     390              : static void
     391            0 : protect_out_of_mem(int slen)
     392              : {
     393            0 :         if ((Size) (slen / 2) >= (MaxAllocSize / (sizeof(trgm) * 3)) ||
     394            0 :                 (Size) slen >= (MaxAllocSize / pg_database_encoding_max_length()))
     395            0 :                 ereport(ERROR,
     396              :                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     397              :                                  errmsg("out of memory")));
     398            0 : }
     399              : 
     400              : /*
     401              :  * Make array of trigrams with sorting and removing duplicate items.
     402              :  *
     403              :  * str: source string, of length slen bytes.
     404              :  *
     405              :  * Returns the sorted array of unique trigrams.
     406              :  */
     407              : TRGM *
     408            0 : generate_trgm(char *str, int slen)
     409              : {
     410            0 :         TRGM       *trg;
     411            0 :         int                     len;
     412              : 
     413            0 :         protect_out_of_mem(slen);
     414              : 
     415            0 :         trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     416            0 :         trg->flag = ARRKEY;
     417              : 
     418            0 :         len = generate_trgm_only(GETARR(trg), str, slen, NULL);
     419            0 :         SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     420              : 
     421            0 :         if (len == 0)
     422            0 :                 return trg;
     423              : 
     424              :         /*
     425              :          * Make trigrams unique.
     426              :          */
     427            0 :         if (len > 1)
     428              :         {
     429            0 :                 qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     430            0 :                 len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     431            0 :         }
     432              : 
     433            0 :         SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     434              : 
     435            0 :         return trg;
     436            0 : }
     437              : 
     438              : /*
     439              :  * Make array of positional trigrams from two trigram arrays trg1 and trg2.
     440              :  *
     441              :  * trg1: trigram array of search pattern, of length len1. trg1 is required
     442              :  *               word which positions don't matter and replaced with -1.
     443              :  * trg2: trigram array of text, of length len2. trg2 is haystack where we
     444              :  *               search and have to store its positions.
     445              :  *
     446              :  * Returns concatenated trigram array.
     447              :  */
     448              : static pos_trgm *
     449            0 : make_positional_trgm(trgm *trg1, int len1, trgm *trg2, int len2)
     450              : {
     451            0 :         pos_trgm   *result;
     452            0 :         int                     i,
     453            0 :                                 len = len1 + len2;
     454              : 
     455            0 :         result = palloc_array(pos_trgm, len);
     456              : 
     457            0 :         for (i = 0; i < len1; i++)
     458              :         {
     459            0 :                 memcpy(&result[i].trg, &trg1[i], sizeof(trgm));
     460            0 :                 result[i].index = -1;
     461            0 :         }
     462              : 
     463            0 :         for (i = 0; i < len2; i++)
     464              :         {
     465            0 :                 memcpy(&result[i + len1].trg, &trg2[i], sizeof(trgm));
     466            0 :                 result[i + len1].index = i;
     467            0 :         }
     468              : 
     469            0 :         return result;
     470            0 : }
     471              : 
     472              : /*
     473              :  * Compare position trigrams: compare trigrams first and position second.
     474              :  */
     475              : static int
     476            0 : comp_ptrgm(const void *v1, const void *v2)
     477              : {
     478            0 :         const pos_trgm *p1 = (const pos_trgm *) v1;
     479            0 :         const pos_trgm *p2 = (const pos_trgm *) v2;
     480            0 :         int                     cmp;
     481              : 
     482            0 :         cmp = CMPTRGM(p1->trg, p2->trg);
     483            0 :         if (cmp != 0)
     484            0 :                 return cmp;
     485              : 
     486            0 :         return pg_cmp_s32(p1->index, p2->index);
     487            0 : }
     488              : 
     489              : /*
     490              :  * Iterative search function which calculates maximum similarity with word in
     491              :  * the string. Maximum similarity is only calculated only if the flag
     492              :  * WORD_SIMILARITY_CHECK_ONLY isn't set.
     493              :  *
     494              :  * trg2indexes: array which stores indexes of the array "found".
     495              :  * found: array which stores true of false values.
     496              :  * ulen1: count of unique trigrams of array "trg1".
     497              :  * len2: length of array "trg2" and array "trg2indexes".
     498              :  * len: length of the array "found".
     499              :  * flags: set of boolean flags parameterizing similarity calculation.
     500              :  * bounds: whether each trigram is left/right bound of word.
     501              :  *
     502              :  * Returns word similarity.
     503              :  */
     504              : static float4
     505            0 : iterate_word_similarity(int *trg2indexes,
     506              :                                                 bool *found,
     507              :                                                 int ulen1,
     508              :                                                 int len2,
     509              :                                                 int len,
     510              :                                                 uint8 flags,
     511              :                                                 TrgmBound *bounds)
     512              : {
     513            0 :         int                *lastpos,
     514              :                                 i,
     515            0 :                                 ulen2 = 0,
     516            0 :                                 count = 0,
     517            0 :                                 upper = -1,
     518              :                                 lower;
     519            0 :         float4          smlr_cur,
     520            0 :                                 smlr_max = 0.0f;
     521            0 :         double          threshold;
     522              : 
     523            0 :         Assert(bounds || !(flags & WORD_SIMILARITY_STRICT));
     524              : 
     525              :         /* Select appropriate threshold */
     526            0 :         threshold = (flags & WORD_SIMILARITY_STRICT) ?
     527            0 :                 strict_word_similarity_threshold :
     528            0 :                 word_similarity_threshold;
     529              : 
     530              :         /*
     531              :          * Consider first trigram as initial lower bound for strict word
     532              :          * similarity, or initialize it later with first trigram present for plain
     533              :          * word similarity.
     534              :          */
     535            0 :         lower = (flags & WORD_SIMILARITY_STRICT) ? 0 : -1;
     536              : 
     537              :         /* Memorise last position of each trigram */
     538            0 :         lastpos = palloc_array(int, len);
     539            0 :         memset(lastpos, -1, sizeof(int) * len);
     540              : 
     541            0 :         for (i = 0; i < len2; i++)
     542              :         {
     543            0 :                 int                     trgindex;
     544              : 
     545            0 :                 CHECK_FOR_INTERRUPTS();
     546              : 
     547              :                 /* Get index of next trigram */
     548            0 :                 trgindex = trg2indexes[i];
     549              : 
     550              :                 /* Update last position of this trigram */
     551            0 :                 if (lower >= 0 || found[trgindex])
     552              :                 {
     553            0 :                         if (lastpos[trgindex] < 0)
     554              :                         {
     555            0 :                                 ulen2++;
     556            0 :                                 if (found[trgindex])
     557            0 :                                         count++;
     558            0 :                         }
     559            0 :                         lastpos[trgindex] = i;
     560            0 :                 }
     561              : 
     562              :                 /*
     563              :                  * Adjust upper bound if trigram is upper bound of word for strict
     564              :                  * word similarity, or if trigram is present in required substring for
     565              :                  * plain word similarity
     566              :                  */
     567            0 :                 if ((flags & WORD_SIMILARITY_STRICT) ? (bounds[i] & TRGM_BOUND_RIGHT)
     568            0 :                         : found[trgindex])
     569              :                 {
     570            0 :                         int                     prev_lower,
     571              :                                                 tmp_ulen2,
     572              :                                                 tmp_lower,
     573              :                                                 tmp_count;
     574              : 
     575            0 :                         upper = i;
     576            0 :                         if (lower == -1)
     577              :                         {
     578            0 :                                 lower = i;
     579            0 :                                 ulen2 = 1;
     580            0 :                         }
     581              : 
     582            0 :                         smlr_cur = CALCSML(count, ulen1, ulen2);
     583              : 
     584              :                         /* Also try to adjust lower bound for greater similarity */
     585            0 :                         tmp_count = count;
     586            0 :                         tmp_ulen2 = ulen2;
     587            0 :                         prev_lower = lower;
     588            0 :                         for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++)
     589              :                         {
     590            0 :                                 float           smlr_tmp;
     591            0 :                                 int                     tmp_trgindex;
     592              : 
     593              :                                 /*
     594              :                                  * Adjust lower bound only if trigram is lower bound of word
     595              :                                  * for strict word similarity, or consider every trigram as
     596              :                                  * lower bound for plain word similarity.
     597              :                                  */
     598            0 :                                 if (!(flags & WORD_SIMILARITY_STRICT)
     599            0 :                                         || (bounds[tmp_lower] & TRGM_BOUND_LEFT))
     600              :                                 {
     601            0 :                                         smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2);
     602            0 :                                         if (smlr_tmp > smlr_cur)
     603              :                                         {
     604            0 :                                                 smlr_cur = smlr_tmp;
     605            0 :                                                 ulen2 = tmp_ulen2;
     606            0 :                                                 lower = tmp_lower;
     607            0 :                                                 count = tmp_count;
     608            0 :                                         }
     609              : 
     610              :                                         /*
     611              :                                          * If we only check that word similarity is greater than
     612              :                                          * threshold we do not need to calculate a maximum
     613              :                                          * similarity.
     614              :                                          */
     615            0 :                                         if ((flags & WORD_SIMILARITY_CHECK_ONLY)
     616            0 :                                                 && smlr_cur >= threshold)
     617            0 :                                                 break;
     618            0 :                                 }
     619              : 
     620            0 :                                 tmp_trgindex = trg2indexes[tmp_lower];
     621            0 :                                 if (lastpos[tmp_trgindex] == tmp_lower)
     622              :                                 {
     623            0 :                                         tmp_ulen2--;
     624            0 :                                         if (found[tmp_trgindex])
     625            0 :                                                 tmp_count--;
     626            0 :                                 }
     627            0 :                         }
     628              : 
     629            0 :                         smlr_max = Max(smlr_max, smlr_cur);
     630              : 
     631              :                         /*
     632              :                          * if we only check that word similarity is greater than threshold
     633              :                          * we do not need to calculate a maximum similarity.
     634              :                          */
     635            0 :                         if ((flags & WORD_SIMILARITY_CHECK_ONLY) && smlr_max >= threshold)
     636            0 :                                 break;
     637              : 
     638            0 :                         for (tmp_lower = prev_lower; tmp_lower < lower; tmp_lower++)
     639              :                         {
     640            0 :                                 int                     tmp_trgindex;
     641              : 
     642            0 :                                 tmp_trgindex = trg2indexes[tmp_lower];
     643            0 :                                 if (lastpos[tmp_trgindex] == tmp_lower)
     644            0 :                                         lastpos[tmp_trgindex] = -1;
     645            0 :                         }
     646            0 :                 }
     647            0 :         }
     648              : 
     649            0 :         pfree(lastpos);
     650              : 
     651            0 :         return smlr_max;
     652            0 : }
     653              : 
     654              : /*
     655              :  * Calculate word similarity.
     656              :  * This function prepare two arrays: "trg2indexes" and "found". Then this arrays
     657              :  * are used to calculate word similarity using iterate_word_similarity().
     658              :  *
     659              :  * "trg2indexes" is array which stores indexes of the array "found".
     660              :  * In other words:
     661              :  * trg2indexes[j] = i;
     662              :  * found[i] = true (or false);
     663              :  * If found[i] == true then there is trigram trg2[j] in array "trg1".
     664              :  * If found[i] == false then there is not trigram trg2[j] in array "trg1".
     665              :  *
     666              :  * str1: search pattern string, of length slen1 bytes.
     667              :  * str2: text in which we are looking for a word, of length slen2 bytes.
     668              :  * flags: set of boolean flags parameterizing similarity calculation.
     669              :  *
     670              :  * Returns word similarity.
     671              :  */
     672              : static float4
     673            0 : calc_word_similarity(char *str1, int slen1, char *str2, int slen2,
     674              :                                          uint8 flags)
     675              : {
     676            0 :         bool       *found;
     677            0 :         pos_trgm   *ptrg;
     678            0 :         trgm       *trg1;
     679            0 :         trgm       *trg2;
     680            0 :         int                     len1,
     681              :                                 len2,
     682              :                                 len,
     683              :                                 i,
     684              :                                 j,
     685              :                                 ulen1;
     686            0 :         int                *trg2indexes;
     687            0 :         float4          result;
     688            0 :         TrgmBound  *bounds;
     689              : 
     690            0 :         protect_out_of_mem(slen1 + slen2);
     691              : 
     692              :         /* Make positional trigrams */
     693            0 :         trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3);
     694            0 :         trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3);
     695            0 :         if (flags & WORD_SIMILARITY_STRICT)
     696            0 :                 bounds = (TrgmBound *) palloc0(sizeof(TrgmBound) * (slen2 / 2 + 1) * 3);
     697              :         else
     698            0 :                 bounds = NULL;
     699              : 
     700            0 :         len1 = generate_trgm_only(trg1, str1, slen1, NULL);
     701            0 :         len2 = generate_trgm_only(trg2, str2, slen2, bounds);
     702              : 
     703            0 :         ptrg = make_positional_trgm(trg1, len1, trg2, len2);
     704            0 :         len = len1 + len2;
     705            0 :         qsort(ptrg, len, sizeof(pos_trgm), comp_ptrgm);
     706              : 
     707            0 :         pfree(trg1);
     708            0 :         pfree(trg2);
     709              : 
     710              :         /*
     711              :          * Merge positional trigrams array: enumerate each trigram and find its
     712              :          * presence in required word.
     713              :          */
     714            0 :         trg2indexes = palloc_array(int, len2);
     715            0 :         found = palloc0_array(bool, len);
     716              : 
     717            0 :         ulen1 = 0;
     718            0 :         j = 0;
     719            0 :         for (i = 0; i < len; i++)
     720              :         {
     721            0 :                 if (i > 0)
     722              :                 {
     723            0 :                         int                     cmp = CMPTRGM(ptrg[i - 1].trg, ptrg[i].trg);
     724              : 
     725            0 :                         if (cmp != 0)
     726              :                         {
     727            0 :                                 if (found[j])
     728            0 :                                         ulen1++;
     729            0 :                                 j++;
     730            0 :                         }
     731            0 :                 }
     732              : 
     733            0 :                 if (ptrg[i].index >= 0)
     734              :                 {
     735            0 :                         trg2indexes[ptrg[i].index] = j;
     736            0 :                 }
     737              :                 else
     738              :                 {
     739            0 :                         found[j] = true;
     740              :                 }
     741            0 :         }
     742            0 :         if (found[j])
     743            0 :                 ulen1++;
     744              : 
     745              :         /* Run iterative procedure to find maximum similarity with word */
     746            0 :         result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len,
     747            0 :                                                                          flags, bounds);
     748              : 
     749            0 :         pfree(trg2indexes);
     750            0 :         pfree(found);
     751            0 :         pfree(ptrg);
     752              : 
     753            0 :         return result;
     754            0 : }
     755              : 
     756              : 
     757              : /*
     758              :  * Extract the next non-wildcard part of a search string, i.e. a word bounded
     759              :  * by '_' or '%' meta-characters, non-word characters or string end.
     760              :  *
     761              :  * str: source string, of length lenstr bytes (need not be null-terminated)
     762              :  * buf: where to return the substring (must be long enough)
     763              :  * *bytelen: receives byte length of the found substring
     764              :  * *charlen: receives character length of the found substring
     765              :  *
     766              :  * Returns pointer to end+1 of the found substring in the source string.
     767              :  * Returns NULL if no word found (in which case buf, bytelen, charlen not set)
     768              :  *
     769              :  * If the found word is bounded by non-word characters or string boundaries
     770              :  * then this function will include corresponding padding spaces into buf.
     771              :  */
     772              : static const char *
     773            0 : get_wildcard_part(const char *str, int lenstr,
     774              :                                   char *buf, int *bytelen, int *charlen)
     775              : {
     776            0 :         const char *beginword = str;
     777            0 :         const char *endword;
     778            0 :         char       *s = buf;
     779            0 :         bool            in_leading_wildcard_meta = false;
     780            0 :         bool            in_trailing_wildcard_meta = false;
     781            0 :         bool            in_escape = false;
     782            0 :         int                     clen;
     783              : 
     784              :         /*
     785              :          * Find the first word character, remembering whether preceding character
     786              :          * was wildcard meta-character.  Note that the in_escape state persists
     787              :          * from this loop to the next one, since we may exit at a word character
     788              :          * that is in_escape.
     789              :          */
     790            0 :         while (beginword - str < lenstr)
     791              :         {
     792            0 :                 if (in_escape)
     793              :                 {
     794            0 :                         if (ISWORDCHR(beginword))
     795            0 :                                 break;
     796            0 :                         in_escape = false;
     797            0 :                         in_leading_wildcard_meta = false;
     798            0 :                 }
     799              :                 else
     800              :                 {
     801            0 :                         if (ISESCAPECHAR(beginword))
     802            0 :                                 in_escape = true;
     803            0 :                         else if (ISWILDCARDCHAR(beginword))
     804            0 :                                 in_leading_wildcard_meta = true;
     805            0 :                         else if (ISWORDCHR(beginword))
     806            0 :                                 break;
     807              :                         else
     808            0 :                                 in_leading_wildcard_meta = false;
     809              :                 }
     810            0 :                 beginword += pg_mblen(beginword);
     811              :         }
     812              : 
     813              :         /*
     814              :          * Handle string end.
     815              :          */
     816            0 :         if (beginword - str >= lenstr)
     817            0 :                 return NULL;
     818              : 
     819              :         /*
     820              :          * Add left padding spaces if preceding character wasn't wildcard
     821              :          * meta-character.
     822              :          */
     823            0 :         *charlen = 0;
     824            0 :         if (!in_leading_wildcard_meta)
     825              :         {
     826              :                 if (LPADDING > 0)
     827              :                 {
     828            0 :                         *s++ = ' ';
     829            0 :                         (*charlen)++;
     830              :                         if (LPADDING > 1)
     831              :                         {
     832            0 :                                 *s++ = ' ';
     833            0 :                                 (*charlen)++;
     834              :                         }
     835              :                 }
     836            0 :         }
     837              : 
     838              :         /*
     839              :          * Copy data into buf until wildcard meta-character, non-word character or
     840              :          * string boundary.  Strip escapes during copy.
     841              :          */
     842            0 :         endword = beginword;
     843            0 :         while (endword - str < lenstr)
     844              :         {
     845            0 :                 clen = pg_mblen(endword);
     846            0 :                 if (in_escape)
     847              :                 {
     848            0 :                         if (ISWORDCHR(endword))
     849              :                         {
     850            0 :                                 memcpy(s, endword, clen);
     851            0 :                                 (*charlen)++;
     852            0 :                                 s += clen;
     853            0 :                         }
     854              :                         else
     855              :                         {
     856              :                                 /*
     857              :                                  * Back up endword to the escape character when stopping at an
     858              :                                  * escaped char, so that subsequent get_wildcard_part will
     859              :                                  * restart from the escape character.  We assume here that
     860              :                                  * escape chars are single-byte.
     861              :                                  */
     862            0 :                                 endword--;
     863            0 :                                 break;
     864              :                         }
     865            0 :                         in_escape = false;
     866            0 :                 }
     867              :                 else
     868              :                 {
     869            0 :                         if (ISESCAPECHAR(endword))
     870            0 :                                 in_escape = true;
     871            0 :                         else if (ISWILDCARDCHAR(endword))
     872              :                         {
     873            0 :                                 in_trailing_wildcard_meta = true;
     874            0 :                                 break;
     875              :                         }
     876            0 :                         else if (ISWORDCHR(endword))
     877              :                         {
     878            0 :                                 memcpy(s, endword, clen);
     879            0 :                                 (*charlen)++;
     880            0 :                                 s += clen;
     881            0 :                         }
     882              :                         else
     883            0 :                                 break;
     884              :                 }
     885            0 :                 endword += clen;
     886              :         }
     887              : 
     888              :         /*
     889              :          * Add right padding spaces if next character isn't wildcard
     890              :          * meta-character.
     891              :          */
     892            0 :         if (!in_trailing_wildcard_meta)
     893              :         {
     894              :                 if (RPADDING > 0)
     895              :                 {
     896            0 :                         *s++ = ' ';
     897            0 :                         (*charlen)++;
     898              :                         if (RPADDING > 1)
     899              :                         {
     900              :                                 *s++ = ' ';
     901              :                                 (*charlen)++;
     902              :                         }
     903              :                 }
     904            0 :         }
     905              : 
     906            0 :         *bytelen = s - buf;
     907            0 :         return endword;
     908            0 : }
     909              : 
     910              : /*
     911              :  * Generates trigrams for wildcard search string.
     912              :  *
     913              :  * Returns array of trigrams that must occur in any string that matches the
     914              :  * wildcard string.  For example, given pattern "a%bcd%" the trigrams
     915              :  * " a", "bcd" would be extracted.
     916              :  */
     917              : TRGM *
     918            0 : generate_wildcard_trgm(const char *str, int slen)
     919              : {
     920            0 :         TRGM       *trg;
     921            0 :         char       *buf,
     922              :                            *buf2;
     923            0 :         trgm       *tptr;
     924            0 :         int                     len,
     925              :                                 charlen,
     926              :                                 bytelen;
     927            0 :         const char *eword;
     928              : 
     929            0 :         protect_out_of_mem(slen);
     930              : 
     931            0 :         trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
     932            0 :         trg->flag = ARRKEY;
     933            0 :         SET_VARSIZE(trg, TRGMHDRSIZE);
     934              : 
     935            0 :         if (slen + LPADDING + RPADDING < 3 || slen == 0)
     936            0 :                 return trg;
     937              : 
     938            0 :         tptr = GETARR(trg);
     939              : 
     940              :         /* Allocate a buffer for blank-padded, but not yet case-folded, words */
     941            0 :         buf = palloc_array(char, slen + 4);
     942              : 
     943              :         /*
     944              :          * Extract trigrams from each substring extracted by get_wildcard_part.
     945              :          */
     946            0 :         eword = str;
     947            0 :         while ((eword = get_wildcard_part(eword, slen - (eword - str),
     948            0 :                                                                           buf, &bytelen, &charlen)) != NULL)
     949              :         {
     950              : #ifdef IGNORECASE
     951            0 :                 buf2 = str_tolower(buf, bytelen, DEFAULT_COLLATION_OID);
     952            0 :                 bytelen = strlen(buf2);
     953              : #else
     954              :                 buf2 = buf;
     955              : #endif
     956              : 
     957              :                 /*
     958              :                  * count trigrams
     959              :                  */
     960            0 :                 tptr = make_trigrams(tptr, buf2, bytelen, charlen);
     961              : 
     962              : #ifdef IGNORECASE
     963            0 :                 pfree(buf2);
     964              : #endif
     965              :         }
     966              : 
     967            0 :         pfree(buf);
     968              : 
     969            0 :         if ((len = tptr - GETARR(trg)) == 0)
     970            0 :                 return trg;
     971              : 
     972              :         /*
     973              :          * Make trigrams unique.
     974              :          */
     975            0 :         if (len > 1)
     976              :         {
     977            0 :                 qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
     978            0 :                 len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
     979            0 :         }
     980              : 
     981            0 :         SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
     982              : 
     983            0 :         return trg;
     984            0 : }
     985              : 
     986              : uint32
     987            0 : trgm2int(trgm *ptr)
     988              : {
     989            0 :         uint32          val = 0;
     990              : 
     991            0 :         val |= *(((unsigned char *) ptr));
     992            0 :         val <<= 8;
     993            0 :         val |= *(((unsigned char *) ptr) + 1);
     994            0 :         val <<= 8;
     995            0 :         val |= *(((unsigned char *) ptr) + 2);
     996              : 
     997            0 :         return val;
     998            0 : }
     999              : 
    1000              : Datum
    1001            0 : show_trgm(PG_FUNCTION_ARGS)
    1002              : {
    1003            0 :         text       *in = PG_GETARG_TEXT_PP(0);
    1004            0 :         TRGM       *trg;
    1005            0 :         Datum      *d;
    1006            0 :         ArrayType  *a;
    1007            0 :         trgm       *ptr;
    1008            0 :         int                     i;
    1009              : 
    1010            0 :         trg = generate_trgm(VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
    1011            0 :         d = palloc_array(Datum, 1 + ARRNELEM(trg));
    1012              : 
    1013            0 :         for (i = 0, ptr = GETARR(trg); i < ARRNELEM(trg); i++, ptr++)
    1014              :         {
    1015            0 :                 text       *item = (text *) palloc(VARHDRSZ + Max(12, pg_database_encoding_max_length() * 3));
    1016              : 
    1017            0 :                 if (pg_database_encoding_max_length() > 1 && !ISPRINTABLETRGM(ptr))
    1018              :                 {
    1019            0 :                         snprintf(VARDATA(item), 12, "0x%06x", trgm2int(ptr));
    1020            0 :                         SET_VARSIZE(item, VARHDRSZ + strlen(VARDATA(item)));
    1021            0 :                 }
    1022              :                 else
    1023              :                 {
    1024            0 :                         SET_VARSIZE(item, VARHDRSZ + 3);
    1025            0 :                         CPTRGM(VARDATA(item), ptr);
    1026              :                 }
    1027            0 :                 d[i] = PointerGetDatum(item);
    1028            0 :         }
    1029              : 
    1030            0 :         a = construct_array_builtin(d, ARRNELEM(trg), TEXTOID);
    1031              : 
    1032            0 :         for (i = 0; i < ARRNELEM(trg); i++)
    1033            0 :                 pfree(DatumGetPointer(d[i]));
    1034              : 
    1035            0 :         pfree(d);
    1036            0 :         pfree(trg);
    1037            0 :         PG_FREE_IF_COPY(in, 0);
    1038              : 
    1039            0 :         PG_RETURN_POINTER(a);
    1040            0 : }
    1041              : 
    1042              : float4
    1043            0 : cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact)
    1044              : {
    1045            0 :         trgm       *ptr1,
    1046              :                            *ptr2;
    1047            0 :         int                     count = 0;
    1048            0 :         int                     len1,
    1049              :                                 len2;
    1050              : 
    1051            0 :         ptr1 = GETARR(trg1);
    1052            0 :         ptr2 = GETARR(trg2);
    1053              : 
    1054            0 :         len1 = ARRNELEM(trg1);
    1055            0 :         len2 = ARRNELEM(trg2);
    1056              : 
    1057              :         /* explicit test is needed to avoid 0/0 division when both lengths are 0 */
    1058            0 :         if (len1 <= 0 || len2 <= 0)
    1059            0 :                 return (float4) 0.0;
    1060              : 
    1061            0 :         while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1062              :         {
    1063            0 :                 int                     res = CMPTRGM(ptr1, ptr2);
    1064              : 
    1065            0 :                 if (res < 0)
    1066            0 :                         ptr1++;
    1067            0 :                 else if (res > 0)
    1068            0 :                         ptr2++;
    1069              :                 else
    1070              :                 {
    1071            0 :                         ptr1++;
    1072            0 :                         ptr2++;
    1073            0 :                         count++;
    1074              :                 }
    1075            0 :         }
    1076              : 
    1077              :         /*
    1078              :          * If inexact then len2 is equal to count, because we don't know actual
    1079              :          * length of second string in inexact search and we can assume that count
    1080              :          * is a lower bound of len2.
    1081              :          */
    1082            0 :         return CALCSML(count, len1, inexact ? count : len2);
    1083            0 : }
    1084              : 
    1085              : 
    1086              : /*
    1087              :  * Returns whether trg2 contains all trigrams in trg1.
    1088              :  * This relies on the trigram arrays being sorted.
    1089              :  */
    1090              : bool
    1091            0 : trgm_contained_by(TRGM *trg1, TRGM *trg2)
    1092              : {
    1093            0 :         trgm       *ptr1,
    1094              :                            *ptr2;
    1095            0 :         int                     len1,
    1096              :                                 len2;
    1097              : 
    1098            0 :         ptr1 = GETARR(trg1);
    1099            0 :         ptr2 = GETARR(trg2);
    1100              : 
    1101            0 :         len1 = ARRNELEM(trg1);
    1102            0 :         len2 = ARRNELEM(trg2);
    1103              : 
    1104            0 :         while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
    1105              :         {
    1106            0 :                 int                     res = CMPTRGM(ptr1, ptr2);
    1107              : 
    1108            0 :                 if (res < 0)
    1109            0 :                         return false;
    1110            0 :                 else if (res > 0)
    1111            0 :                         ptr2++;
    1112              :                 else
    1113              :                 {
    1114            0 :                         ptr1++;
    1115            0 :                         ptr2++;
    1116              :                 }
    1117            0 :         }
    1118            0 :         if (ptr1 - GETARR(trg1) < len1)
    1119            0 :                 return false;
    1120              :         else
    1121            0 :                 return true;
    1122            0 : }
    1123              : 
    1124              : /*
    1125              :  * Return a palloc'd boolean array showing, for each trigram in "query",
    1126              :  * whether it is present in the trigram array "key".
    1127              :  * This relies on the "key" array being sorted, but "query" need not be.
    1128              :  */
    1129              : bool *
    1130            0 : trgm_presence_map(TRGM *query, TRGM *key)
    1131              : {
    1132            0 :         bool       *result;
    1133            0 :         trgm       *ptrq = GETARR(query),
    1134            0 :                            *ptrk = GETARR(key);
    1135            0 :         int                     lenq = ARRNELEM(query),
    1136            0 :                                 lenk = ARRNELEM(key),
    1137              :                                 i;
    1138              : 
    1139            0 :         result = palloc0_array(bool, lenq);
    1140              : 
    1141              :         /* for each query trigram, do a binary search in the key array */
    1142            0 :         for (i = 0; i < lenq; i++)
    1143              :         {
    1144            0 :                 int                     lo = 0;
    1145            0 :                 int                     hi = lenk;
    1146              : 
    1147            0 :                 while (lo < hi)
    1148              :                 {
    1149            0 :                         int                     mid = (lo + hi) / 2;
    1150            0 :                         int                     res = CMPTRGM(ptrq, ptrk + mid);
    1151              : 
    1152            0 :                         if (res < 0)
    1153            0 :                                 hi = mid;
    1154            0 :                         else if (res > 0)
    1155            0 :                                 lo = mid + 1;
    1156              :                         else
    1157              :                         {
    1158            0 :                                 result[i] = true;
    1159            0 :                                 break;
    1160              :                         }
    1161            0 :                 }
    1162            0 :                 ptrq++;
    1163            0 :         }
    1164              : 
    1165            0 :         return result;
    1166            0 : }
    1167              : 
    1168              : Datum
    1169            0 : similarity(PG_FUNCTION_ARGS)
    1170              : {
    1171            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1172            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1173            0 :         TRGM       *trg1,
    1174              :                            *trg2;
    1175            0 :         float4          res;
    1176              : 
    1177            0 :         trg1 = generate_trgm(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1));
    1178            0 :         trg2 = generate_trgm(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2));
    1179              : 
    1180            0 :         res = cnt_sml(trg1, trg2, false);
    1181              : 
    1182            0 :         pfree(trg1);
    1183            0 :         pfree(trg2);
    1184            0 :         PG_FREE_IF_COPY(in1, 0);
    1185            0 :         PG_FREE_IF_COPY(in2, 1);
    1186              : 
    1187            0 :         PG_RETURN_FLOAT4(res);
    1188            0 : }
    1189              : 
    1190              : Datum
    1191            0 : word_similarity(PG_FUNCTION_ARGS)
    1192              : {
    1193            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1194            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1195            0 :         float4          res;
    1196              : 
    1197            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1198            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1199              :                                                            0);
    1200              : 
    1201            0 :         PG_FREE_IF_COPY(in1, 0);
    1202            0 :         PG_FREE_IF_COPY(in2, 1);
    1203            0 :         PG_RETURN_FLOAT4(res);
    1204            0 : }
    1205              : 
    1206              : Datum
    1207            0 : strict_word_similarity(PG_FUNCTION_ARGS)
    1208              : {
    1209            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1210            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1211            0 :         float4          res;
    1212              : 
    1213            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1214            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1215              :                                                            WORD_SIMILARITY_STRICT);
    1216              : 
    1217            0 :         PG_FREE_IF_COPY(in1, 0);
    1218            0 :         PG_FREE_IF_COPY(in2, 1);
    1219            0 :         PG_RETURN_FLOAT4(res);
    1220            0 : }
    1221              : 
    1222              : Datum
    1223            0 : similarity_dist(PG_FUNCTION_ARGS)
    1224              : {
    1225            0 :         float4          res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1226              :                                                                                                                  PG_GETARG_DATUM(0),
    1227              :                                                                                                                  PG_GETARG_DATUM(1)));
    1228              : 
    1229            0 :         PG_RETURN_FLOAT4(1.0 - res);
    1230            0 : }
    1231              : 
    1232              : Datum
    1233            0 : similarity_op(PG_FUNCTION_ARGS)
    1234              : {
    1235            0 :         float4          res = DatumGetFloat4(DirectFunctionCall2(similarity,
    1236              :                                                                                                                  PG_GETARG_DATUM(0),
    1237              :                                                                                                                  PG_GETARG_DATUM(1)));
    1238              : 
    1239            0 :         PG_RETURN_BOOL(res >= similarity_threshold);
    1240            0 : }
    1241              : 
    1242              : Datum
    1243            0 : word_similarity_op(PG_FUNCTION_ARGS)
    1244              : {
    1245            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1246            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1247            0 :         float4          res;
    1248              : 
    1249            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1250            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1251              :                                                            WORD_SIMILARITY_CHECK_ONLY);
    1252              : 
    1253            0 :         PG_FREE_IF_COPY(in1, 0);
    1254            0 :         PG_FREE_IF_COPY(in2, 1);
    1255            0 :         PG_RETURN_BOOL(res >= word_similarity_threshold);
    1256            0 : }
    1257              : 
    1258              : Datum
    1259            0 : word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1260              : {
    1261            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1262            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1263            0 :         float4          res;
    1264              : 
    1265            0 :         res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1266            0 :                                                            VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1267              :                                                            WORD_SIMILARITY_CHECK_ONLY);
    1268              : 
    1269            0 :         PG_FREE_IF_COPY(in1, 0);
    1270            0 :         PG_FREE_IF_COPY(in2, 1);
    1271            0 :         PG_RETURN_BOOL(res >= word_similarity_threshold);
    1272            0 : }
    1273              : 
    1274              : Datum
    1275            0 : word_similarity_dist_op(PG_FUNCTION_ARGS)
    1276              : {
    1277            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1278            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1279            0 :         float4          res;
    1280              : 
    1281            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1282            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1283              :                                                            0);
    1284              : 
    1285            0 :         PG_FREE_IF_COPY(in1, 0);
    1286            0 :         PG_FREE_IF_COPY(in2, 1);
    1287            0 :         PG_RETURN_FLOAT4(1.0 - res);
    1288            0 : }
    1289              : 
    1290              : Datum
    1291            0 : word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1292              : {
    1293            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1294            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1295            0 :         float4          res;
    1296              : 
    1297            0 :         res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1298            0 :                                                            VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1299              :                                                            0);
    1300              : 
    1301            0 :         PG_FREE_IF_COPY(in1, 0);
    1302            0 :         PG_FREE_IF_COPY(in2, 1);
    1303            0 :         PG_RETURN_FLOAT4(1.0 - res);
    1304            0 : }
    1305              : 
    1306              : Datum
    1307            0 : strict_word_similarity_op(PG_FUNCTION_ARGS)
    1308              : {
    1309            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1310            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1311            0 :         float4          res;
    1312              : 
    1313            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1314            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1315              :                                                            WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1316              : 
    1317            0 :         PG_FREE_IF_COPY(in1, 0);
    1318            0 :         PG_FREE_IF_COPY(in2, 1);
    1319            0 :         PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1320            0 : }
    1321              : 
    1322              : Datum
    1323            0 : strict_word_similarity_commutator_op(PG_FUNCTION_ARGS)
    1324              : {
    1325            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1326            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1327            0 :         float4          res;
    1328              : 
    1329            0 :         res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1330            0 :                                                            VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1331              :                                                            WORD_SIMILARITY_CHECK_ONLY | WORD_SIMILARITY_STRICT);
    1332              : 
    1333            0 :         PG_FREE_IF_COPY(in1, 0);
    1334            0 :         PG_FREE_IF_COPY(in2, 1);
    1335            0 :         PG_RETURN_BOOL(res >= strict_word_similarity_threshold);
    1336            0 : }
    1337              : 
    1338              : Datum
    1339            0 : strict_word_similarity_dist_op(PG_FUNCTION_ARGS)
    1340              : {
    1341            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1342            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1343            0 :         float4          res;
    1344              : 
    1345            0 :         res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1346            0 :                                                            VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1347              :                                                            WORD_SIMILARITY_STRICT);
    1348              : 
    1349            0 :         PG_FREE_IF_COPY(in1, 0);
    1350            0 :         PG_FREE_IF_COPY(in2, 1);
    1351            0 :         PG_RETURN_FLOAT4(1.0 - res);
    1352            0 : }
    1353              : 
    1354              : Datum
    1355            0 : strict_word_similarity_dist_commutator_op(PG_FUNCTION_ARGS)
    1356              : {
    1357            0 :         text       *in1 = PG_GETARG_TEXT_PP(0);
    1358            0 :         text       *in2 = PG_GETARG_TEXT_PP(1);
    1359            0 :         float4          res;
    1360              : 
    1361            0 :         res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2),
    1362            0 :                                                            VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1),
    1363              :                                                            WORD_SIMILARITY_STRICT);
    1364              : 
    1365            0 :         PG_FREE_IF_COPY(in1, 0);
    1366            0 :         PG_FREE_IF_COPY(in2, 1);
    1367            0 :         PG_RETURN_FLOAT4(1.0 - res);
    1368            0 : }
        

Generated by: LCOV version 2.3.2-1