LCOV - Code coverage - src/backend/tsearch/dict

LCOV - code coverage report

Current view:	top level - src/backend/tsearch - dict_thesaurus.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	87.9 %	455	400
Test Date:	2026-01-26 10:56:24	Functions:	100.0 %	17	17
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	59.3 %	329	195

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * dict_thesaurus.c
       4                 :             :  *              Thesaurus dictionary: phrase to phrase substitution
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  *
       8                 :             :  *
       9                 :             :  * IDENTIFICATION
      10                 :             :  *        src/backend/tsearch/dict_thesaurus.c
      11                 :             :  *
      12                 :             :  *-------------------------------------------------------------------------
      13                 :             :  */
      14                 :             : #include "postgres.h"
      15                 :             : 
      16                 :             : #include "catalog/namespace.h"
      17                 :             : #include "commands/defrem.h"
      18                 :             : #include "tsearch/ts_cache.h"
      19                 :             : #include "tsearch/ts_locale.h"
      20                 :             : #include "tsearch/ts_public.h"
      21                 :             : #include "utils/fmgrprotos.h"
      22                 :             : #include "utils/regproc.h"
      23                 :             : 
      24                 :             : 
      25                 :             : /*
      26                 :             :  * Temporary we use TSLexeme.flags for inner use...
      27                 :             :  */
      28                 :             : #define DT_USEASIS              0x1000
      29                 :             : 
      30                 :             : typedef struct LexemeInfo
      31                 :             : {
      32                 :             :         uint32          idsubst;                /* entry's number in DictThesaurus->subst */
      33                 :             :         uint16          posinsubst;             /* pos info in entry */
      34                 :             :         uint16          tnvariant;              /* total num lexemes in one variant */
      35                 :             :         struct LexemeInfo *nextentry;
      36                 :             :         struct LexemeInfo *nextvariant;
      37                 :             : } LexemeInfo;
      38                 :             : 
      39                 :             : typedef struct
      40                 :             : {
      41                 :             :         char       *lexeme;
      42                 :             :         LexemeInfo *entries;
      43                 :             : } TheLexeme;
      44                 :             : 
      45                 :             : typedef struct
      46                 :             : {
      47                 :             :         uint16          lastlexeme;             /* number lexemes to substitute */
      48                 :             :         uint16          reslen;
      49                 :             :         TSLexeme   *res;                        /* prepared substituted result */
      50                 :             : } TheSubstitute;
      51                 :             : 
      52                 :             : typedef struct
      53                 :             : {
      54                 :             :         /* subdictionary to normalize lexemes */
      55                 :             :         Oid                     subdictOid;
      56                 :             :         TSDictionaryCacheEntry *subdict;
      57                 :             : 
      58                 :             :         /* Array to search lexeme by exact match */
      59                 :             :         TheLexeme  *wrds;
      60                 :             :         int                     nwrds;                  /* current number of words */
      61                 :             :         int                     ntwrds;                 /* allocated array length */
      62                 :             : 
      63                 :             :         /*
      64                 :             :          * Storage of substituted result, n-th element is for n-th expression
      65                 :             :          */
      66                 :             :         TheSubstitute *subst;
      67                 :             :         int                     nsubst;
      68                 :             : } DictThesaurus;
      69                 :             : 
      70                 :             : 
      71                 :             : static void
      72                 :          30 : newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
      73                 :             : {
      74                 :          30 :         TheLexeme  *ptr;
      75                 :             : 
      76         [ +  + ]:          30 :         if (d->nwrds >= d->ntwrds)
      77                 :             :         {
      78         [ -  + ]:           2 :                 if (d->ntwrds == 0)
      79                 :             :                 {
      80                 :           2 :                         d->ntwrds = 16;
      81                 :           2 :                         d->wrds = palloc_array(TheLexeme, d->ntwrds);
      82                 :           2 :                 }
      83                 :             :                 else
      84                 :             :                 {
      85                 :           0 :                         d->ntwrds *= 2;
      86                 :           0 :                         d->wrds = repalloc_array(d->wrds, TheLexeme, d->ntwrds);
      87                 :             :                 }
      88                 :           2 :         }
      89                 :             : 
      90                 :          30 :         ptr = d->wrds + d->nwrds;
      91                 :          30 :         d->nwrds++;
      92                 :             : 
      93                 :          30 :         ptr->lexeme = palloc(e - b + 1);
      94                 :             : 
      95                 :          30 :         memcpy(ptr->lexeme, b, e - b);
      96                 :          30 :         ptr->lexeme[e - b] = '\0';
      97                 :             : 
      98                 :          30 :         ptr->entries = palloc_object(LexemeInfo);
      99                 :             : 
     100                 :          30 :         ptr->entries->nextentry = NULL;
     101                 :          30 :         ptr->entries->idsubst = idsubst;
     102                 :          30 :         ptr->entries->posinsubst = posinsubst;
     103                 :          30 : }
     104                 :             : 
     105                 :             : static void
     106                 :          24 : addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
     107                 :             : {
     108                 :             :         static int      nres = 0;
     109                 :             :         static int      ntres = 0;
     110                 :          24 :         TheSubstitute *ptr;
     111                 :             : 
     112         [ +  + ]:          24 :         if (nwrd == 0)
     113                 :             :         {
     114                 :          16 :                 nres = ntres = 0;
     115                 :             : 
     116         [ +  + ]:          16 :                 if (idsubst >= d->nsubst)
     117                 :             :                 {
     118         [ -  + ]:           2 :                         if (d->nsubst == 0)
     119                 :             :                         {
     120                 :           2 :                                 d->nsubst = 16;
     121                 :           2 :                                 d->subst = palloc_array(TheSubstitute, d->nsubst);
     122                 :           2 :                         }
     123                 :             :                         else
     124                 :             :                         {
     125                 :           0 :                                 d->nsubst *= 2;
     126                 :           0 :                                 d->subst = repalloc_array(d->subst, TheSubstitute, d->nsubst);
     127                 :             :                         }
     128                 :           2 :                 }
     129                 :          16 :         }
     130                 :             : 
     131                 :          24 :         ptr = d->subst + idsubst;
     132                 :             : 
     133                 :          24 :         ptr->lastlexeme = posinsubst - 1;
     134                 :             : 
     135         [ +  + ]:          24 :         if (nres + 1 >= ntres)
     136                 :             :         {
     137         [ +  + ]:          20 :                 if (ntres == 0)
     138                 :             :                 {
     139                 :          16 :                         ntres = 2;
     140                 :          16 :                         ptr->res = palloc_array(TSLexeme, ntres);
     141                 :          16 :                 }
     142                 :             :                 else
     143                 :             :                 {
     144                 :           4 :                         ntres *= 2;
     145                 :           4 :                         ptr->res = repalloc_array(ptr->res, TSLexeme, ntres);
     146                 :             :                 }
     147                 :          20 :         }
     148                 :             : 
     149                 :          24 :         ptr->res[nres].lexeme = palloc(e - b + 1);
     150                 :          24 :         memcpy(ptr->res[nres].lexeme, b, e - b);
     151                 :          24 :         ptr->res[nres].lexeme[e - b] = '\0';
     152                 :             : 
     153                 :          24 :         ptr->res[nres].nvariant = nwrd;
     154         [ +  + ]:          24 :         if (useasis)
     155                 :          12 :                 ptr->res[nres].flags = DT_USEASIS;
     156                 :             :         else
     157                 :          12 :                 ptr->res[nres].flags = 0;
     158                 :             : 
     159                 :          24 :         ptr->res[++nres].lexeme = NULL;
     160                 :          24 : }
     161                 :             : 
     162                 :             : #define TR_WAITLEX      1
     163                 :             : #define TR_INLEX        2
     164                 :             : #define TR_WAITSUBS 3
     165                 :             : #define TR_INSUBS       4
     166                 :             : 
     167                 :             : static void
     168                 :           2 : thesaurusRead(const char *filename, DictThesaurus *d)
     169                 :             : {
     170                 :           2 :         char       *real_filename = get_tsearch_config_filename(filename, "ths");
     171                 :           2 :         tsearch_readline_state trst;
     172                 :           2 :         uint32          idsubst = 0;
     173                 :           2 :         bool            useasis = false;
     174                 :           2 :         char       *line;
     175                 :             : 
     176         [ +  - ]:           2 :         if (!tsearch_readline_begin(&trst, real_filename))
     177   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     178                 :             :                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     179                 :             :                                  errmsg("could not open thesaurus file \"%s\": %m",
     180                 :             :                                                 real_filename)));
     181                 :             : 
     182         [ +  + ]:          36 :         while ((line = tsearch_readline(&trst)) != NULL)
     183                 :             :         {
     184                 :          34 :                 char       *ptr;
     185                 :          34 :                 int                     state = TR_WAITLEX;
     186                 :          34 :                 char       *beginwrd = NULL;
     187                 :          34 :                 uint32          posinsubst = 0;
     188                 :          34 :                 uint32          nwrd = 0;
     189                 :             : 
     190                 :          34 :                 ptr = line;
     191                 :             : 
     192                 :             :                 /* is it a comment? */
     193   [ +  +  +  + ]:          38 :                 while (*ptr && isspace((unsigned char) *ptr))
     194                 :           4 :                         ptr += pg_mblen(ptr);
     195                 :             : 
     196   [ +  +  +  + ]:          34 :                 if (t_iseq(ptr, '#') || *ptr == '\0' ||
     197   [ +  -  -  + ]:          16 :                         t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
     198                 :             :                 {
     199                 :          18 :                         pfree(line);
     200                 :          18 :                         continue;
     201                 :             :                 }
     202                 :             : 
     203         [ +  + ]:         370 :                 while (*ptr)
     204                 :             :                 {
     205         [ +  + ]:         354 :                         if (state == TR_WAITLEX)
     206                 :             :                         {
     207         [ +  + ]:          46 :                                 if (t_iseq(ptr, ':'))
     208                 :             :                                 {
     209         [ +  - ]:          16 :                                         if (posinsubst == 0)
     210   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
     211                 :             :                                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     212                 :             :                                                                  errmsg("unexpected delimiter")));
     213                 :          16 :                                         state = TR_WAITSUBS;
     214                 :          16 :                                 }
     215         [ +  - ]:          30 :                                 else if (!isspace((unsigned char) *ptr))
     216                 :             :                                 {
     217                 :          30 :                                         beginwrd = ptr;
     218                 :          30 :                                         state = TR_INLEX;
     219                 :          30 :                                 }
     220                 :          46 :                         }
     221         [ +  + ]:         308 :                         else if (state == TR_INLEX)
     222                 :             :                         {
     223         [ -  + ]:         154 :                                 if (t_iseq(ptr, ':'))
     224                 :             :                                 {
     225                 :           0 :                                         newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
     226                 :           0 :                                         state = TR_WAITSUBS;
     227                 :           0 :                                 }
     228         [ +  + ]:         154 :                                 else if (isspace((unsigned char) *ptr))
     229                 :             :                                 {
     230                 :          30 :                                         newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
     231                 :          30 :                                         state = TR_WAITLEX;
     232                 :          30 :                                 }
     233                 :         154 :                         }
     234         [ +  + ]:         154 :                         else if (state == TR_WAITSUBS)
     235                 :             :                         {
     236         [ +  + ]:          40 :                                 if (t_iseq(ptr, '*'))
     237                 :             :                                 {
     238                 :          12 :                                         useasis = true;
     239                 :          12 :                                         state = TR_INSUBS;
     240                 :          12 :                                         beginwrd = ptr + pg_mblen(ptr);
     241                 :          12 :                                 }
     242         [ -  + ]:          28 :                                 else if (t_iseq(ptr, '\\'))
     243                 :             :                                 {
     244                 :           0 :                                         useasis = false;
     245                 :           0 :                                         state = TR_INSUBS;
     246                 :           0 :                                         beginwrd = ptr + pg_mblen(ptr);
     247                 :           0 :                                 }
     248         [ +  + ]:          28 :                                 else if (!isspace((unsigned char) *ptr))
     249                 :             :                                 {
     250                 :          12 :                                         useasis = false;
     251                 :          12 :                                         beginwrd = ptr;
     252                 :          12 :                                         state = TR_INSUBS;
     253                 :          12 :                                 }
     254                 :          40 :                         }
     255         [ +  - ]:         114 :                         else if (state == TR_INSUBS)
     256                 :             :                         {
     257         [ +  + ]:         114 :                                 if (isspace((unsigned char) *ptr))
     258                 :             :                                 {
     259         [ +  - ]:          24 :                                         if (ptr == beginwrd)
     260   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
     261                 :             :                                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     262                 :             :                                                                  errmsg("unexpected end of line or lexeme")));
     263                 :          24 :                                         addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
     264                 :          24 :                                         state = TR_WAITSUBS;
     265                 :          24 :                                 }
     266                 :         114 :                         }
     267                 :             :                         else
     268   [ #  #  #  # ]:           0 :                                 elog(ERROR, "unrecognized thesaurus state: %d", state);
     269                 :             : 
     270                 :         354 :                         ptr += pg_mblen(ptr);
     271                 :             :                 }
     272                 :             : 
     273         [ +  - ]:          16 :                 if (state == TR_INSUBS)
     274                 :             :                 {
     275         [ #  # ]:           0 :                         if (ptr == beginwrd)
     276   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     277                 :             :                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     278                 :             :                                                  errmsg("unexpected end of line or lexeme")));
     279                 :           0 :                         addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
     280                 :           0 :                 }
     281                 :             : 
     282                 :          16 :                 idsubst++;
     283                 :             : 
     284         [ +  - ]:          16 :                 if (!(nwrd && posinsubst))
     285   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     286                 :             :                                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     287                 :             :                                          errmsg("unexpected end of line")));
     288                 :             : 
     289         [ +  - ]:          16 :                 if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
     290   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     291                 :             :                                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     292                 :             :                                          errmsg("too many lexemes in thesaurus entry")));
     293                 :             : 
     294                 :          16 :                 pfree(line);
     295      [ -  +  + ]:          34 :         }
     296                 :             : 
     297                 :           2 :         d->nsubst = idsubst;
     298                 :             : 
     299                 :           2 :         tsearch_readline_end(&trst);
     300                 :           2 :         pfree(real_filename);
     301                 :           2 : }
     302                 :             : 
     303                 :             : static TheLexeme *
     304                 :          30 : addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
     305                 :             : {
     306         [ +  - ]:          30 :         if (*nnw >= *tnm)
     307                 :             :         {
     308                 :           0 :                 *tnm *= 2;
     309                 :           0 :                 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
     310                 :           0 :         }
     311                 :             : 
     312                 :          30 :         newwrds[*nnw].entries = palloc_object(LexemeInfo);
     313                 :             : 
     314   [ +  +  -  + ]:          30 :         if (lexeme && lexeme->lexeme)
     315                 :             :         {
     316                 :          28 :                 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
     317                 :          28 :                 newwrds[*nnw].entries->tnvariant = tnvariant;
     318                 :          28 :         }
     319                 :             :         else
     320                 :             :         {
     321                 :           2 :                 newwrds[*nnw].lexeme = NULL;
     322                 :           2 :                 newwrds[*nnw].entries->tnvariant = 1;
     323                 :             :         }
     324                 :             : 
     325                 :          30 :         newwrds[*nnw].entries->idsubst = src->idsubst;
     326                 :          30 :         newwrds[*nnw].entries->posinsubst = src->posinsubst;
     327                 :             : 
     328                 :          30 :         newwrds[*nnw].entries->nextentry = NULL;
     329                 :             : 
     330                 :          30 :         (*nnw)++;
     331                 :          30 :         return newwrds;
     332                 :             : }
     333                 :             : 
     334                 :             : static int
     335                 :          32 : cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
     336                 :             : {
     337   [ +  -  -  + ]:          32 :         if (a == NULL || b == NULL)
     338                 :           0 :                 return 0;
     339                 :             : 
     340         [ -  + ]:          32 :         if (a->idsubst == b->idsubst)
     341                 :             :         {
     342         [ #  # ]:           0 :                 if (a->posinsubst == b->posinsubst)
     343                 :             :                 {
     344         [ #  # ]:           0 :                         if (a->tnvariant == b->tnvariant)
     345                 :           0 :                                 return 0;
     346                 :             : 
     347                 :           0 :                         return (a->tnvariant > b->tnvariant) ? 1 : -1;
     348                 :             :                 }
     349                 :             : 
     350                 :           0 :                 return (a->posinsubst > b->posinsubst) ? 1 : -1;
     351                 :             :         }
     352                 :             : 
     353                 :          32 :         return (a->idsubst > b->idsubst) ? 1 : -1;
     354                 :          32 : }
     355                 :             : 
     356                 :             : static int
     357                 :         220 : cmpLexeme(const TheLexeme *a, const TheLexeme *b)
     358                 :             : {
     359         [ +  + ]:         220 :         if (a->lexeme == NULL)
     360                 :             :         {
     361         [ +  + ]:          26 :                 if (b->lexeme == NULL)
     362                 :           6 :                         return 0;
     363                 :             :                 else
     364                 :          20 :                         return 1;
     365                 :             :         }
     366         [ +  + ]:         194 :         else if (b->lexeme == NULL)
     367                 :           3 :                 return -1;
     368                 :             : 
     369                 :         191 :         return strcmp(a->lexeme, b->lexeme);
     370                 :         220 : }
     371                 :             : 
     372                 :             : static int
     373                 :          98 : cmpLexemeQ(const void *a, const void *b)
     374                 :             : {
     375                 :          98 :         return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
     376                 :             : }
     377                 :             : 
     378                 :             : static int
     379                 :          94 : cmpTheLexeme(const void *a, const void *b)
     380                 :             : {
     381                 :          94 :         const TheLexeme *la = (const TheLexeme *) a;
     382                 :          94 :         const TheLexeme *lb = (const TheLexeme *) b;
     383                 :          94 :         int                     res;
     384                 :             : 
     385         [ +  + ]:          94 :         if ((res = cmpLexeme(la, lb)) != 0)
     386                 :          76 :                 return res;
     387                 :             : 
     388                 :          18 :         return -cmpLexemeInfo(la->entries, lb->entries);
     389                 :          94 : }
     390                 :             : 
     391                 :             : static void
     392                 :           2 : compileTheLexeme(DictThesaurus *d)
     393                 :             : {
     394                 :           4 :         int                     i,
     395                 :           2 :                                 nnw = 0,
     396                 :           2 :                                 tnm = 16;
     397                 :           2 :         TheLexeme  *newwrds = palloc_array(TheLexeme, tnm),
     398                 :             :                            *ptrwrds;
     399                 :             : 
     400         [ +  + ]:          32 :         for (i = 0; i < d->nwrds; i++)
     401                 :             :         {
     402                 :          30 :                 TSLexeme   *ptr;
     403                 :             : 
     404         [ +  + ]:          30 :                 if (strcmp(d->wrds[i].lexeme, "?") == 0)   /* Is stop word marker? */
     405                 :           2 :                         newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
     406                 :             :                 else
     407                 :             :                 {
     408                 :          28 :                         ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
     409                 :             :                                                                                                                          PointerGetDatum(d->subdict->dictData),
     410                 :             :                                                                                                                          PointerGetDatum(d->wrds[i].lexeme),
     411                 :             :                                                                                                                          Int32GetDatum(strlen(d->wrds[i].lexeme)),
     412                 :             :                                                                                                                          PointerGetDatum(NULL)));
     413                 :             : 
     414         [ +  - ]:          28 :                         if (!ptr)
     415   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     416                 :             :                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     417                 :             :                                                  errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
     418                 :             :                                                                 d->wrds[i].lexeme,
     419                 :             :                                                                 d->wrds[i].entries->idsubst + 1)));
     420         [ +  - ]:          28 :                         else if (!(ptr->lexeme))
     421   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     422                 :             :                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     423                 :             :                                                  errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
     424                 :             :                                                                 d->wrds[i].lexeme,
     425                 :             :                                                                 d->wrds[i].entries->idsubst + 1),
     426                 :             :                                                  errhint("Use \"?\" to represent a stop word within a sample phrase.")));
     427                 :             :                         else
     428                 :             :                         {
     429         [ +  + ]:          56 :                                 while (ptr->lexeme)
     430                 :             :                                 {
     431                 :          28 :                                         TSLexeme   *remptr = ptr + 1;
     432                 :          28 :                                         int                     tnvar = 1;
     433                 :          28 :                                         int                     curvar = ptr->nvariant;
     434                 :             : 
     435                 :             :                                         /* compute n words in one variant */
     436         [ +  - ]:          28 :                                         while (remptr->lexeme)
     437                 :             :                                         {
     438         [ #  # ]:           0 :                                                 if (remptr->nvariant != (remptr - 1)->nvariant)
     439                 :           0 :                                                         break;
     440                 :           0 :                                                 tnvar++;
     441                 :           0 :                                                 remptr++;
     442                 :             :                                         }
     443                 :             : 
     444                 :          28 :                                         remptr = ptr;
     445   [ +  +  +  + ]:          56 :                                         while (remptr->lexeme && remptr->nvariant == curvar)
     446                 :             :                                         {
     447                 :          28 :                                                 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
     448                 :          28 :                                                 remptr++;
     449                 :             :                                         }
     450                 :             : 
     451                 :          28 :                                         ptr = remptr;
     452                 :          28 :                                 }
     453                 :             :                         }
     454                 :             :                 }
     455                 :             : 
     456                 :          30 :                 pfree(d->wrds[i].lexeme);
     457                 :          30 :                 pfree(d->wrds[i].entries);
     458                 :          30 :         }
     459                 :             : 
     460         [ -  + ]:           2 :         if (d->wrds)
     461                 :           2 :                 pfree(d->wrds);
     462                 :           2 :         d->wrds = newwrds;
     463                 :           2 :         d->nwrds = nnw;
     464                 :           2 :         d->ntwrds = tnm;
     465                 :             : 
     466         [ -  + ]:           2 :         if (d->nwrds > 1)
     467                 :             :         {
     468                 :           2 :                 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
     469                 :             : 
     470                 :             :                 /* uniq */
     471                 :           2 :                 newwrds = d->wrds;
     472                 :           2 :                 ptrwrds = d->wrds + 1;
     473         [ +  + ]:          30 :                 while (ptrwrds - d->wrds < d->nwrds)
     474                 :             :                 {
     475         [ +  + ]:          28 :                         if (cmpLexeme(ptrwrds, newwrds) == 0)
     476                 :             :                         {
     477         [ +  - ]:          14 :                                 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
     478                 :             :                                 {
     479                 :          14 :                                         ptrwrds->entries->nextentry = newwrds->entries;
     480                 :          14 :                                         newwrds->entries = ptrwrds->entries;
     481                 :          14 :                                 }
     482                 :             :                                 else
     483                 :           0 :                                         pfree(ptrwrds->entries);
     484                 :             : 
     485         [ -  + ]:          14 :                                 if (ptrwrds->lexeme)
     486                 :          14 :                                         pfree(ptrwrds->lexeme);
     487                 :          14 :                         }
     488                 :             :                         else
     489                 :             :                         {
     490                 :          14 :                                 newwrds++;
     491                 :          14 :                                 *newwrds = *ptrwrds;
     492                 :             :                         }
     493                 :             : 
     494                 :          28 :                         ptrwrds++;
     495                 :             :                 }
     496                 :             : 
     497                 :           2 :                 d->nwrds = newwrds - d->wrds + 1;
     498                 :           2 :                 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
     499                 :           2 :         }
     500                 :           2 : }
     501                 :             : 
     502                 :             : static void
     503                 :           2 : compileTheSubstitute(DictThesaurus *d)
     504                 :             : {
     505                 :           2 :         int                     i;
     506                 :             : 
     507         [ +  + ]:          18 :         for (i = 0; i < d->nsubst; i++)
     508                 :             :         {
     509                 :          16 :                 TSLexeme   *rem = d->subst[i].res,
     510                 :             :                                    *outptr,
     511                 :             :                                    *inptr;
     512                 :          16 :                 int                     n = 2;
     513                 :             : 
     514                 :          16 :                 outptr = d->subst[i].res = palloc_array(TSLexeme, n);
     515                 :          16 :                 outptr->lexeme = NULL;
     516                 :          16 :                 inptr = rem;
     517                 :             : 
     518   [ -  +  +  + ]:          40 :                 while (inptr && inptr->lexeme)
     519                 :             :                 {
     520                 :          24 :                         TSLexeme   *lexized,
     521                 :             :                                                 tmplex[2];
     522                 :             : 
     523         [ +  + ]:          24 :                         if (inptr->flags & DT_USEASIS)
     524                 :             :                         {                                       /* do not lexize */
     525                 :          12 :                                 tmplex[0] = *inptr;
     526                 :          12 :                                 tmplex[0].flags = 0;
     527                 :          12 :                                 tmplex[1].lexeme = NULL;
     528                 :          12 :                                 lexized = tmplex;
     529                 :          12 :                         }
     530                 :             :                         else
     531                 :             :                         {
     532                 :          12 :                                 lexized = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
     533                 :             :                                                                                                                                          PointerGetDatum(d->subdict->dictData),
     534                 :             :                                                                                                                                          PointerGetDatum(inptr->lexeme),
     535                 :             :                                                                                                                                          Int32GetDatum(strlen(inptr->lexeme)),
     536                 :             :                                                                                                                                          PointerGetDatum(NULL)));
     537                 :             :                         }
     538                 :             : 
     539         [ +  - ]:          24 :                         if (lexized && lexized->lexeme)
     540                 :             :                         {
     541   [ +  -  +  + ]:          24 :                                 int                     toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
     542                 :             : 
     543         [ +  + ]:          48 :                                 while (lexized->lexeme)
     544                 :             :                                 {
     545         [ +  + ]:          24 :                                         if (outptr - d->subst[i].res + 1 >= n)
     546                 :             :                                         {
     547                 :           4 :                                                 int                     diff = outptr - d->subst[i].res;
     548                 :             : 
     549                 :           4 :                                                 n *= 2;
     550                 :           4 :                                                 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
     551                 :           4 :                                                 outptr = d->subst[i].res + diff;
     552                 :           4 :                                         }
     553                 :             : 
     554                 :          24 :                                         *outptr = *lexized;
     555                 :          24 :                                         outptr->lexeme = pstrdup(lexized->lexeme);
     556                 :             : 
     557                 :          24 :                                         outptr++;
     558                 :          24 :                                         lexized++;
     559                 :             :                                 }
     560                 :             : 
     561         [ +  + ]:          24 :                                 if (toset > 0)
     562                 :           8 :                                         d->subst[i].res[toset].flags |= TSL_ADDPOS;
     563                 :          24 :                         }
     564         [ #  # ]:           0 :                         else if (lexized)
     565                 :             :                         {
     566   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     567                 :             :                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     568                 :             :                                                  errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
     569                 :             :                                                                 inptr->lexeme, i + 1)));
     570                 :           0 :                         }
     571                 :             :                         else
     572                 :             :                         {
     573   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     574                 :             :                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
     575                 :             :                                                  errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
     576                 :             :                                                                 inptr->lexeme, i + 1)));
     577                 :             :                         }
     578                 :             : 
     579         [ -  + ]:          24 :                         if (inptr->lexeme)
     580                 :          24 :                                 pfree(inptr->lexeme);
     581                 :          24 :                         inptr++;
     582                 :          24 :                 }
     583                 :             : 
     584         [ +  - ]:          16 :                 if (outptr == d->subst[i].res)
     585   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     586                 :             :                                         (errcode(ERRCODE_CONFIG_FILE_ERROR),
     587                 :             :                                          errmsg("thesaurus substitute phrase is empty (rule %d)",
     588                 :             :                                                         i + 1)));
     589                 :             : 
     590                 :          16 :                 d->subst[i].reslen = outptr - d->subst[i].res;
     591                 :             : 
     592                 :          16 :                 pfree(rem);
     593                 :          16 :         }
     594                 :           2 : }
     595                 :             : 
     596                 :             : Datum
     597                 :           2 : thesaurus_init(PG_FUNCTION_ARGS)
     598                 :             : {
     599                 :           2 :         List       *dictoptions = (List *) PG_GETARG_POINTER(0);
     600                 :           2 :         DictThesaurus *d;
     601                 :           2 :         char       *subdictname = NULL;
     602                 :           2 :         bool            fileloaded = false;
     603                 :           2 :         List       *namelist;
     604                 :           2 :         ListCell   *l;
     605                 :             : 
     606                 :           2 :         d = palloc0_object(DictThesaurus);
     607                 :             : 
     608   [ +  -  +  +  :           6 :         foreach(l, dictoptions)
                   +  + ]
     609                 :             :         {
     610                 :           4 :                 DefElem    *defel = (DefElem *) lfirst(l);
     611                 :             : 
     612         [ +  + ]:           4 :                 if (strcmp(defel->defname, "dictfile") == 0)
     613                 :             :                 {
     614         [ +  - ]:           2 :                         if (fileloaded)
     615   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     616                 :             :                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     617                 :             :                                                  errmsg("multiple DictFile parameters")));
     618                 :           2 :                         thesaurusRead(defGetString(defel), d);
     619                 :           2 :                         fileloaded = true;
     620                 :           2 :                 }
     621         [ +  - ]:           2 :                 else if (strcmp(defel->defname, "dictionary") == 0)
     622                 :             :                 {
     623         [ +  - ]:           2 :                         if (subdictname)
     624   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     625                 :             :                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     626                 :             :                                                  errmsg("multiple Dictionary parameters")));
     627                 :           2 :                         subdictname = pstrdup(defGetString(defel));
     628                 :           2 :                 }
     629                 :             :                 else
     630                 :             :                 {
     631   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     632                 :             :                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     633                 :             :                                          errmsg("unrecognized Thesaurus parameter: \"%s\"",
     634                 :             :                                                         defel->defname)));
     635                 :             :                 }
     636                 :           4 :         }
     637                 :             : 
     638         [ +  - ]:           2 :         if (!fileloaded)
     639   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     640                 :             :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     641                 :             :                                  errmsg("missing DictFile parameter")));
     642         [ +  - ]:           2 :         if (!subdictname)
     643   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     644                 :             :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     645                 :             :                                  errmsg("missing Dictionary parameter")));
     646                 :             : 
     647                 :           2 :         namelist = stringToQualifiedNameList(subdictname, NULL);
     648                 :           2 :         d->subdictOid = get_ts_dict_oid(namelist, false);
     649                 :           2 :         d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
     650                 :             : 
     651                 :           2 :         compileTheLexeme(d);
     652                 :           2 :         compileTheSubstitute(d);
     653                 :             : 
     654                 :           4 :         PG_RETURN_POINTER(d);
     655                 :           2 : }
     656                 :             : 
     657                 :             : static LexemeInfo *
     658                 :          32 : findTheLexeme(DictThesaurus *d, char *lexeme)
     659                 :             : {
     660                 :          32 :         TheLexeme       key,
     661                 :             :                            *res;
     662                 :             : 
     663         [ +  - ]:          32 :         if (d->nwrds == 0)
     664                 :           0 :                 return NULL;
     665                 :             : 
     666                 :          32 :         key.lexeme = lexeme;
     667                 :          32 :         key.entries = NULL;
     668                 :             : 
     669                 :          32 :         res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
     670                 :             : 
     671         [ +  + ]:          32 :         if (res == NULL)
     672                 :           9 :                 return NULL;
     673                 :          23 :         return res->entries;
     674                 :          32 : }
     675                 :             : 
     676                 :             : static bool
     677                 :          48 : matchIdSubst(LexemeInfo *stored, uint32 idsubst)
     678                 :             : {
     679                 :          48 :         bool            res = true;
     680                 :             : 
     681         [ +  + ]:          48 :         if (stored)
     682                 :             :         {
     683                 :          25 :                 res = false;
     684                 :             : 
     685         [ +  + ]:          55 :                 for (; stored; stored = stored->nextvariant)
     686         [ +  + ]:          39 :                         if (stored->idsubst == idsubst)
     687                 :             :                         {
     688                 :           9 :                                 res = true;
     689                 :           9 :                                 break;
     690                 :             :                         }
     691                 :          25 :         }
     692                 :             : 
     693                 :          96 :         return res;
     694                 :          48 : }
     695                 :             : 
     696                 :             : static LexemeInfo *
     697                 :          23 : findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
     698                 :             : {
     699                 :          55 :         for (;;)
     700                 :             :         {
     701                 :          55 :                 int                     i;
     702                 :          55 :                 LexemeInfo *ptr = newin[0];
     703                 :             : 
     704         [ +  + ]:          90 :                 for (i = 0; i < newn; i++)
     705                 :             :                 {
     706   [ +  +  -  + ]:          58 :                         while (newin[i] && newin[i]->idsubst < ptr->idsubst)
     707                 :           0 :                                 newin[i] = newin[i]->nextentry;
     708                 :             : 
     709         [ +  + ]:          58 :                         if (newin[i] == NULL)
     710                 :          13 :                                 return in;
     711                 :             : 
     712         [ -  + ]:          45 :                         if (newin[i]->idsubst > ptr->idsubst)
     713                 :             :                         {
     714                 :           0 :                                 ptr = newin[i];
     715                 :           0 :                                 i = -1;
     716                 :           0 :                                 continue;
     717                 :             :                         }
     718                 :             : 
     719         [ +  + ]:          48 :                         while (newin[i]->idsubst == ptr->idsubst)
     720                 :             :                         {
     721   [ +  +  +  - ]:          45 :                                 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
     722                 :             :                                 {
     723                 :          32 :                                         ptr = newin[i];
     724                 :          32 :                                         break;
     725                 :             :                                 }
     726                 :             : 
     727                 :          13 :                                 newin[i] = newin[i]->nextentry;
     728         [ +  + ]:          13 :                                 if (newin[i] == NULL)
     729                 :          10 :                                         return in;
     730                 :             :                         }
     731                 :             : 
     732         [ +  + ]:          35 :                         if (newin[i]->idsubst != ptr->idsubst)
     733                 :             :                         {
     734                 :           3 :                                 ptr = newin[i];
     735                 :           3 :                                 i = -1;
     736                 :           3 :                                 continue;
     737                 :             :                         }
     738                 :          32 :                 }
     739                 :             : 
     740   [ +  -  +  -  :          32 :                 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
             +  +  -  + ]
     741                 :             :                 {                                               /* found */
     742                 :             : 
     743                 :          32 :                         ptr->nextvariant = in;
     744                 :          32 :                         in = ptr;
     745                 :          32 :                 }
     746                 :             : 
     747                 :             :                 /* step forward */
     748         [ +  + ]:          64 :                 for (i = 0; i < newn; i++)
     749                 :          32 :                         newin[i] = newin[i]->nextentry;
     750      [ -  +  + ]:          55 :         }
     751                 :          23 : }
     752                 :             : 
     753                 :             : static TSLexeme *
     754                 :          13 : copyTSLexeme(TheSubstitute *ts)
     755                 :             : {
     756                 :          13 :         TSLexeme   *res;
     757                 :          13 :         uint16          i;
     758                 :             : 
     759                 :          13 :         res = palloc_array(TSLexeme, ts->reslen + 1);
     760         [ +  + ]:          30 :         for (i = 0; i < ts->reslen; i++)
     761                 :             :         {
     762                 :          17 :                 res[i] = ts->res[i];
     763                 :          17 :                 res[i].lexeme = pstrdup(ts->res[i].lexeme);
     764                 :          17 :         }
     765                 :             : 
     766                 :          13 :         res[ts->reslen].lexeme = NULL;
     767                 :             : 
     768                 :          26 :         return res;
     769                 :          13 : }
     770                 :             : 
     771                 :             : static TSLexeme *
     772                 :          16 : checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
     773                 :             : {
     774                 :          16 :         *moreres = false;
     775         [ +  + ]:          21 :         while (info)
     776                 :             :         {
     777         [ +  - ]:          18 :                 Assert(info->idsubst < d->nsubst);
     778         [ +  + ]:          18 :                 if (info->nextvariant)
     779                 :          11 :                         *moreres = true;
     780         [ +  + ]:          18 :                 if (d->subst[info->idsubst].lastlexeme == curpos)
     781                 :          13 :                         return copyTSLexeme(d->subst + info->idsubst);
     782                 :           5 :                 info = info->nextvariant;
     783                 :             :         }
     784                 :             : 
     785                 :           3 :         return NULL;
     786                 :          16 : }
     787                 :             : 
     788                 :             : Datum
     789                 :          34 : thesaurus_lexize(PG_FUNCTION_ARGS)
     790                 :             : {
     791                 :          34 :         DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
     792                 :          34 :         DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
     793                 :          34 :         TSLexeme   *res = NULL;
     794                 :          34 :         LexemeInfo *stored,
     795                 :          34 :                            *info = NULL;
     796                 :          34 :         uint16          curpos = 0;
     797                 :          34 :         bool            moreres = false;
     798                 :             : 
     799         [ +  - ]:          34 :         if (PG_NARGS() != 4 || dstate == NULL)
     800   [ #  #  #  # ]:           0 :                 elog(ERROR, "forbidden call of thesaurus or nested call");
     801                 :             : 
     802         [ +  + ]:          34 :         if (dstate->isend)
     803                 :           2 :                 PG_RETURN_POINTER(NULL);
     804                 :          32 :         stored = (LexemeInfo *) dstate->private_state;
     805                 :             : 
     806         [ +  + ]:          32 :         if (stored)
     807                 :          10 :                 curpos = stored->posinsubst + 1;
     808                 :             : 
     809         [ +  - ]:          32 :         if (!d->subdict->isvalid)
     810                 :           0 :                 d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
     811                 :             : 
     812                 :          32 :         res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
     813                 :             :                                                                                                          PointerGetDatum(d->subdict->dictData),
     814                 :             :                                                                                                          PG_GETARG_DATUM(1),
     815                 :             :                                                                                                          PG_GETARG_DATUM(2),
     816                 :             :                                                                                                          PointerGetDatum(NULL)));
     817                 :             : 
     818   [ +  -  +  + ]:          32 :         if (res && res->lexeme)
     819                 :             :         {
     820                 :          26 :                 TSLexeme   *ptr = res,
     821                 :             :                                    *basevar;
     822                 :             : 
     823         [ +  + ]:          52 :                 while (ptr->lexeme)
     824                 :             :                 {
     825                 :          26 :                         uint16          nv = ptr->nvariant;
     826                 :          26 :                         uint16          i,
     827                 :          26 :                                                 nlex = 0;
     828                 :          26 :                         LexemeInfo **infos;
     829                 :             : 
     830                 :          26 :                         basevar = ptr;
     831   [ +  +  +  + ]:          52 :                         while (ptr->lexeme && nv == ptr->nvariant)
     832                 :             :                         {
     833                 :          26 :                                 nlex++;
     834                 :          26 :                                 ptr++;
     835                 :             :                         }
     836                 :             : 
     837                 :          26 :                         infos = palloc_array(LexemeInfo *, nlex);
     838         [ +  + ]:          43 :                         for (i = 0; i < nlex; i++)
     839         [ +  + ]:          26 :                                 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
     840                 :           9 :                                         break;
     841                 :             : 
     842         [ +  + ]:          26 :                         if (i < nlex)
     843                 :             :                         {
     844                 :             :                                 /* no chance to find */
     845                 :           9 :                                 pfree(infos);
     846                 :           9 :                                 continue;
     847                 :             :                         }
     848                 :             : 
     849                 :          17 :                         info = findVariant(info, stored, curpos, infos, nlex);
     850      [ -  +  + ]:          26 :                 }
     851                 :          26 :         }
     852         [ +  - ]:           6 :         else if (res)
     853                 :             :         {                                                       /* stop-word */
     854                 :           6 :                 LexemeInfo *infos = findTheLexeme(d, NULL);
     855                 :             : 
     856                 :           6 :                 info = findVariant(NULL, stored, curpos, &infos, 1);
     857                 :           6 :         }
     858                 :             :         else
     859                 :             :         {
     860                 :           0 :                 info = NULL;                    /* word isn't recognized */
     861                 :             :         }
     862                 :             : 
     863                 :          32 :         dstate->private_state = info;
     864                 :             : 
     865         [ +  + ]:          32 :         if (!info)
     866                 :             :         {
     867                 :          16 :                 dstate->getnext = false;
     868                 :          16 :                 PG_RETURN_POINTER(NULL);
     869                 :             :         }
     870                 :             : 
     871         [ +  + ]:          16 :         if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
     872                 :             :         {
     873                 :          13 :                 dstate->getnext = moreres;
     874                 :          13 :                 PG_RETURN_POINTER(res);
     875                 :             :         }
     876                 :             : 
     877                 :           3 :         dstate->getnext = true;
     878                 :             : 
     879                 :           3 :         PG_RETURN_POINTER(NULL);
     880                 :          34 : }

Generated by: LCOV version 2.3.2-1