LCOV - Code coverage - src/backend/utils/adt/tsvector.c

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - tsvector.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	68.2 %	308	210
Test Date:	2026-01-26 10:56:24	Functions:	75.0 %	8	6
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	49.7 %	167	83

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * tsvector.c
       4                 :             :  *        I/O functions for tsvector
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  *
       8                 :             :  *
       9                 :             :  * IDENTIFICATION
      10                 :             :  *        src/backend/utils/adt/tsvector.c
      11                 :             :  *
      12                 :             :  *-------------------------------------------------------------------------
      13                 :             :  */
      14                 :             : 
      15                 :             : #include "postgres.h"
      16                 :             : 
      17                 :             : #include "common/int.h"
      18                 :             : #include "libpq/pqformat.h"
      19                 :             : #include "nodes/miscnodes.h"
      20                 :             : #include "tsearch/ts_locale.h"
      21                 :             : #include "tsearch/ts_utils.h"
      22                 :             : #include "utils/fmgrprotos.h"
      23                 :             : #include "utils/memutils.h"
      24                 :             : #include "varatt.h"
      25                 :             : 
      26                 :             : typedef struct
      27                 :             : {
      28                 :             :         WordEntry       entry;                  /* must be first, see compareentry */
      29                 :             :         WordEntryPos *pos;
      30                 :             :         int                     poslen;                 /* number of elements in pos */
      31                 :             : } WordEntryIN;
      32                 :             : 
      33                 :             : 
      34                 :             : /* Compare two WordEntryPos values for qsort */
      35                 :             : int
      36                 :         168 : compareWordEntryPos(const void *a, const void *b)
      37                 :             : {
      38                 :         168 :         int                     apos = WEP_GETPOS(*(const WordEntryPos *) a);
      39                 :         168 :         int                     bpos = WEP_GETPOS(*(const WordEntryPos *) b);
      40                 :             : 
      41                 :         336 :         return pg_cmp_s32(apos, bpos);
      42                 :         168 : }
      43                 :             : 
      44                 :             : /*
      45                 :             :  * Removes duplicate pos entries. If there's two entries with same pos but
      46                 :             :  * different weight, the higher weight is retained, so we can't use
      47                 :             :  * qunique here.
      48                 :             :  *
      49                 :             :  * Returns new length.
      50                 :             :  */
      51                 :             : static int
      52                 :        1601 : uniquePos(WordEntryPos *a, int l)
      53                 :             : {
      54                 :        1601 :         WordEntryPos *ptr,
      55                 :             :                            *res;
      56                 :             : 
      57         [ +  + ]:        1601 :         if (l <= 1)
      58                 :        1512 :                 return l;
      59                 :             : 
      60                 :          89 :         qsort(a, l, sizeof(WordEntryPos), compareWordEntryPos);
      61                 :             : 
      62                 :          89 :         res = a;
      63                 :          89 :         ptr = a + 1;
      64         [ +  + ]:         242 :         while (ptr - a < l)
      65                 :             :         {
      66         [ +  + ]:         153 :                 if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
      67                 :             :                 {
      68                 :         149 :                         res++;
      69                 :         149 :                         *res = *ptr;
      70   [ +  -  -  + ]:         149 :                         if (res - a >= MAXNUMPOS - 1 ||
      71                 :         149 :                                 WEP_GETPOS(*res) == MAXENTRYPOS - 1)
      72                 :           0 :                                 break;
      73                 :         149 :                 }
      74         [ +  + ]:           4 :                 else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
      75                 :           1 :                         WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
      76                 :         153 :                 ptr++;
      77                 :             :         }
      78                 :             : 
      79                 :          89 :         return res + 1 - a;
      80                 :        1601 : }
      81                 :             : 
      82                 :             : /*
      83                 :             :  * Compare two WordEntry structs for qsort_arg.  This can also be used on
      84                 :             :  * WordEntryIN structs, since those have WordEntry as their first field.
      85                 :             :  */
      86                 :             : static int
      87                 :      178746 : compareentry(const void *va, const void *vb, void *arg)
      88                 :             : {
      89                 :      178746 :         const WordEntry *a = (const WordEntry *) va;
      90                 :      178746 :         const WordEntry *b = (const WordEntry *) vb;
      91                 :      178746 :         char       *BufferStr = (char *) arg;
      92                 :             : 
      93                 :      536238 :         return tsCompareString(&BufferStr[a->pos], a->len,
      94                 :      178746 :                                                    &BufferStr[b->pos], b->len,
      95                 :             :                                                    false);
      96                 :      178746 : }
      97                 :             : 
      98                 :             : /*
      99                 :             :  * Sort an array of WordEntryIN, remove duplicates.
     100                 :             :  * *outbuflen receives the amount of space needed for strings and positions.
     101                 :             :  */
     102                 :             : static int
     103                 :         616 : uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
     104                 :             : {
     105                 :         616 :         int                     buflen;
     106                 :         616 :         WordEntryIN *ptr,
     107                 :             :                            *res;
     108                 :             : 
     109         [ +  - ]:         616 :         Assert(l >= 1);
     110                 :             : 
     111         [ +  + ]:         616 :         if (l > 1)
     112                 :         599 :                 qsort_arg(a, l, sizeof(WordEntryIN), compareentry, buf);
     113                 :             : 
     114                 :         616 :         buflen = 0;
     115                 :         616 :         res = a;
     116                 :         616 :         ptr = a + 1;
     117         [ +  + ]:       30133 :         while (ptr - a < l)
     118                 :             :         {
     119   [ +  +  +  + ]:       29517 :                 if (!(ptr->entry.len == res->entry.len &&
     120                 :       58680 :                           strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
     121                 :       58680 :                                           res->entry.len) == 0))
     122                 :             :                 {
     123                 :             :                         /* done accumulating data into *res, count space needed */
     124                 :       28596 :                         buflen += res->entry.len;
     125         [ +  + ]:       28596 :                         if (res->entry.haspos)
     126                 :             :                         {
     127                 :        1499 :                                 res->poslen = uniquePos(res->pos, res->poslen);
     128                 :        1499 :                                 buflen = SHORTALIGN(buflen);
     129                 :        1499 :                                 buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
     130                 :        1499 :                         }
     131                 :       28596 :                         res++;
     132         [ +  + ]:       28596 :                         if (res != ptr)
     133                 :       14762 :                                 memcpy(res, ptr, sizeof(WordEntryIN));
     134                 :       28596 :                 }
     135         [ +  + ]:         921 :                 else if (ptr->entry.haspos)
     136                 :             :                 {
     137         [ +  + ]:          53 :                         if (res->entry.haspos)
     138                 :             :                         {
     139                 :             :                                 /* append ptr's positions to res's positions */
     140                 :          52 :                                 int                     newlen = ptr->poslen + res->poslen;
     141                 :             : 
     142                 :          52 :                                 res->pos = (WordEntryPos *)
     143                 :          52 :                                         repalloc(res->pos, newlen * sizeof(WordEntryPos));
     144                 :          52 :                                 memcpy(&res->pos[res->poslen], ptr->pos,
     145                 :             :                                            ptr->poslen * sizeof(WordEntryPos));
     146                 :          52 :                                 res->poslen = newlen;
     147                 :          52 :                                 pfree(ptr->pos);
     148                 :          52 :                         }
     149                 :             :                         else
     150                 :             :                         {
     151                 :             :                                 /* just give ptr's positions to pos */
     152                 :           1 :                                 res->entry.haspos = 1;
     153                 :           1 :                                 res->pos = ptr->pos;
     154                 :           1 :                                 res->poslen = ptr->poslen;
     155                 :             :                         }
     156                 :          53 :                 }
     157                 :       29517 :                 ptr++;
     158                 :             :         }
     159                 :             : 
     160                 :             :         /* count space needed for last item */
     161                 :         616 :         buflen += res->entry.len;
     162         [ +  + ]:         616 :         if (res->entry.haspos)
     163                 :             :         {
     164                 :         102 :                 res->poslen = uniquePos(res->pos, res->poslen);
     165                 :         102 :                 buflen = SHORTALIGN(buflen);
     166                 :         102 :                 buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
     167                 :         102 :         }
     168                 :             : 
     169                 :         616 :         *outbuflen = buflen;
     170                 :        1232 :         return res + 1 - a;
     171                 :         616 : }
     172                 :             : 
     173                 :             : 
     174                 :             : Datum
     175                 :         626 : tsvectorin(PG_FUNCTION_ARGS)
     176                 :             : {
     177                 :         626 :         char       *buf = PG_GETARG_CSTRING(0);
     178                 :         626 :         Node       *escontext = fcinfo->context;
     179                 :         626 :         TSVectorParseState state;
     180                 :         626 :         WordEntryIN *arr;
     181                 :         626 :         int                     totallen;
     182                 :         626 :         int                     arrlen;                 /* allocated size of arr */
     183                 :         626 :         WordEntry  *inarr;
     184                 :         626 :         int                     len = 0;
     185                 :         626 :         TSVector        in;
     186                 :         626 :         int                     i;
     187                 :         626 :         char       *token;
     188                 :         626 :         int                     toklen;
     189                 :         626 :         WordEntryPos *pos;
     190                 :         626 :         int                     poslen;
     191                 :         626 :         char       *strbuf;
     192                 :         626 :         int                     stroff;
     193                 :             : 
     194                 :             :         /*
     195                 :             :          * Tokens are appended to tmpbuf, cur is a pointer to the end of used
     196                 :             :          * space in tmpbuf.
     197                 :             :          */
     198                 :         626 :         char       *tmpbuf;
     199                 :         626 :         char       *cur;
     200                 :         626 :         int                     buflen = 256;   /* allocated size of tmpbuf */
     201                 :             : 
     202                 :         626 :         state = init_tsvector_parser(buf, 0, escontext);
     203                 :             : 
     204                 :         626 :         arrlen = 64;
     205                 :         626 :         arr = palloc_array(WordEntryIN, arrlen);
     206                 :         626 :         cur = tmpbuf = palloc_array(char, buflen);
     207                 :             : 
     208         [ +  + ]:       30759 :         while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
     209                 :             :         {
     210         [ -  + ]:       30133 :                 if (toklen >= MAXSTRLEN)
     211         [ #  # ]:           0 :                         ereturn(escontext, (Datum) 0,
     212                 :             :                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     213                 :             :                                          errmsg("word is too long (%d bytes, max %d bytes)",
     214                 :             :                                                         toklen,
     215                 :             :                                                         MAXSTRLEN - 1)));
     216                 :             : 
     217         [ -  + ]:       30133 :                 if (cur - tmpbuf > MAXSTRPOS)
     218         [ #  # ]:           0 :                         ereturn(escontext, (Datum) 0,
     219                 :             :                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     220                 :             :                                          errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
     221                 :             :                                                         (long) (cur - tmpbuf), (long) MAXSTRPOS)));
     222                 :             : 
     223                 :             :                 /*
     224                 :             :                  * Enlarge buffers if needed
     225                 :             :                  */
     226         [ +  + ]:       30133 :                 if (len >= arrlen)
     227                 :             :                 {
     228                 :         219 :                         arrlen *= 2;
     229                 :         219 :                         arr = (WordEntryIN *)
     230                 :         219 :                                 repalloc(arr, sizeof(WordEntryIN) * arrlen);
     231                 :         219 :                 }
     232         [ -  + ]:       30133 :                 while ((cur - tmpbuf) + toklen >= buflen)
     233                 :             :                 {
     234                 :           0 :                         int                     dist = cur - tmpbuf;
     235                 :             : 
     236                 :           0 :                         buflen *= 2;
     237                 :           0 :                         tmpbuf = (char *) repalloc(tmpbuf, buflen);
     238                 :           0 :                         cur = tmpbuf + dist;
     239                 :           0 :                 }
     240                 :       30133 :                 arr[len].entry.len = toklen;
     241                 :       30133 :                 arr[len].entry.pos = cur - tmpbuf;
     242                 :       30133 :                 memcpy(cur, token, toklen);
     243                 :       30133 :                 cur += toklen;
     244                 :             : 
     245         [ +  + ]:       30133 :                 if (poslen != 0)
     246                 :             :                 {
     247                 :        1653 :                         arr[len].entry.haspos = 1;
     248                 :        1653 :                         arr[len].pos = pos;
     249                 :        1653 :                         arr[len].poslen = poslen;
     250                 :        1653 :                 }
     251                 :             :                 else
     252                 :             :                 {
     253                 :       28480 :                         arr[len].entry.haspos = 0;
     254                 :       28480 :                         arr[len].pos = NULL;
     255                 :       28480 :                         arr[len].poslen = 0;
     256                 :             :                 }
     257                 :       30133 :                 len++;
     258                 :             :         }
     259                 :             : 
     260                 :         626 :         close_tsvector_parser(state);
     261                 :             : 
     262                 :             :         /* Did gettoken_tsvector fail? */
     263   [ +  +  +  -  :         626 :         if (SOFT_ERROR_OCCURRED(escontext))
                   +  + ]
     264                 :           2 :                 PG_RETURN_NULL();
     265                 :             : 
     266         [ +  + ]:         624 :         if (len > 0)
     267                 :         616 :                 len = uniqueentry(arr, len, tmpbuf, &buflen);
     268                 :             :         else
     269                 :           8 :                 buflen = 0;
     270                 :             : 
     271         [ +  - ]:         624 :         if (buflen > MAXSTRPOS)
     272         [ #  # ]:           0 :                 ereturn(escontext, (Datum) 0,
     273                 :             :                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     274                 :             :                                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
     275                 :             : 
     276                 :         624 :         totallen = CALCDATASIZE(len, buflen);
     277                 :         624 :         in = (TSVector) palloc0(totallen);
     278                 :         624 :         SET_VARSIZE(in, totallen);
     279                 :         624 :         in->size = len;
     280                 :         624 :         inarr = ARRPTR(in);
     281                 :         624 :         strbuf = STRPTR(in);
     282                 :         624 :         stroff = 0;
     283         [ +  + ]:       29836 :         for (i = 0; i < len; i++)
     284                 :             :         {
     285                 :       29212 :                 memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
     286                 :       29212 :                 arr[i].entry.pos = stroff;
     287                 :       29212 :                 stroff += arr[i].entry.len;
     288         [ +  + ]:       29212 :                 if (arr[i].entry.haspos)
     289                 :             :                 {
     290                 :             :                         /* This should be unreachable because of MAXNUMPOS restrictions */
     291         [ +  - ]:        1601 :                         if (arr[i].poslen > 0xFFFF)
     292   [ #  #  #  # ]:           0 :                                 elog(ERROR, "positions array too long");
     293                 :             : 
     294                 :             :                         /* Copy number of positions */
     295                 :        1601 :                         stroff = SHORTALIGN(stroff);
     296                 :        1601 :                         *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
     297                 :        1601 :                         stroff += sizeof(uint16);
     298                 :             : 
     299                 :             :                         /* Copy positions */
     300                 :        1601 :                         memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
     301                 :        1601 :                         stroff += arr[i].poslen * sizeof(WordEntryPos);
     302                 :             : 
     303                 :        1601 :                         pfree(arr[i].pos);
     304                 :        1601 :                 }
     305                 :       29212 :                 inarr[i] = arr[i].entry;
     306                 :       29212 :         }
     307                 :             : 
     308         [ +  - ]:         624 :         Assert((strbuf + stroff - (char *) in) == totallen);
     309                 :             : 
     310                 :         624 :         PG_RETURN_TSVECTOR(in);
     311                 :         626 : }
     312                 :             : 
     313                 :             : Datum
     314                 :         108 : tsvectorout(PG_FUNCTION_ARGS)
     315                 :             : {
     316                 :         108 :         TSVector        out = PG_GETARG_TSVECTOR(0);
     317                 :         108 :         char       *outbuf;
     318                 :         216 :         int32           i,
     319                 :         108 :                                 lenbuf = 0,
     320                 :             :                                 pp;
     321                 :         108 :         WordEntry  *ptr = ARRPTR(out);
     322                 :         108 :         char       *curbegin,
     323                 :             :                            *curin,
     324                 :             :                            *curout;
     325                 :             : 
     326                 :         108 :         lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
     327         [ +  + ]:         539 :         for (i = 0; i < out->size; i++)
     328                 :             :         {
     329                 :         431 :                 lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
     330         [ +  + ]:         431 :                 if (ptr[i].haspos)
     331         [ +  - ]:         364 :                         lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
     332                 :         431 :         }
     333                 :             : 
     334                 :         108 :         curout = outbuf = (char *) palloc(lenbuf);
     335         [ +  + ]:         539 :         for (i = 0; i < out->size; i++)
     336                 :             :         {
     337                 :         431 :                 curbegin = curin = STRPTR(out) + ptr->pos;
     338         [ +  + ]:         431 :                 if (i != 0)
     339                 :         342 :                         *curout++ = ' ';
     340                 :         431 :                 *curout++ = '\'';
     341         [ +  + ]:        2395 :                 while (curin - curbegin < ptr->len)
     342                 :             :                 {
     343                 :        1964 :                         int                     len = pg_mblen(curin);
     344                 :             : 
     345         [ +  + ]:        1964 :                         if (t_iseq(curin, '\''))
     346                 :           4 :                                 *curout++ = '\'';
     347         [ +  + ]:        1960 :                         else if (t_iseq(curin, '\\'))
     348                 :          15 :                                 *curout++ = '\\';
     349                 :             : 
     350         [ +  + ]:        3928 :                         while (len--)
     351                 :        1964 :                                 *curout++ = *curin++;
     352                 :        1964 :                 }
     353                 :             : 
     354                 :         431 :                 *curout++ = '\'';
     355   [ +  +  +  + ]:         431 :                 if ((pp = POSDATALEN(out, ptr)) != 0)
     356                 :             :                 {
     357                 :         364 :                         WordEntryPos *wptr;
     358                 :             : 
     359                 :         364 :                         *curout++ = ':';
     360                 :         364 :                         wptr = POSDATAPTR(out, ptr);
     361         [ +  + ]:         818 :                         while (pp)
     362                 :             :                         {
     363                 :         454 :                                 curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
     364   [ -  +  +  +  :         454 :                                 switch (WEP_GETWEIGHT(*wptr))
                      + ]
     365                 :             :                                 {
     366                 :             :                                         case 3:
     367                 :          18 :                                                 *curout++ = 'A';
     368                 :          18 :                                                 break;
     369                 :             :                                         case 2:
     370                 :          10 :                                                 *curout++ = 'B';
     371                 :          10 :                                                 break;
     372                 :             :                                         case 1:
     373                 :          37 :                                                 *curout++ = 'C';
     374                 :          37 :                                                 break;
     375                 :         389 :                                         case 0:
     376                 :             :                                         default:
     377                 :         389 :                                                 break;
     378                 :             :                                 }
     379                 :             : 
     380         [ +  + ]:         454 :                                 if (pp > 1)
     381                 :          90 :                                         *curout++ = ',';
     382                 :         454 :                                 pp--;
     383                 :         454 :                                 wptr++;
     384                 :             :                         }
     385                 :         364 :                 }
     386                 :         431 :                 ptr++;
     387                 :         431 :         }
     388                 :             : 
     389                 :         108 :         *curout = '\0';
     390         [ +  + ]:         108 :         PG_FREE_IF_COPY(out, 0);
     391                 :         216 :         PG_RETURN_CSTRING(outbuf);
     392                 :         108 : }
     393                 :             : 
     394                 :             : /*
     395                 :             :  * Binary Input / Output functions. The binary format is as follows:
     396                 :             :  *
     397                 :             :  * uint32       number of lexemes
     398                 :             :  *
     399                 :             :  * for each lexeme:
     400                 :             :  *              lexeme text in client encoding, null-terminated
     401                 :             :  *              uint16  number of positions
     402                 :             :  *              for each position:
     403                 :             :  *                      uint16 WordEntryPos
     404                 :             :  */
     405                 :             : 
     406                 :             : Datum
     407                 :           0 : tsvectorsend(PG_FUNCTION_ARGS)
     408                 :             : {
     409                 :           0 :         TSVector        vec = PG_GETARG_TSVECTOR(0);
     410                 :           0 :         StringInfoData buf;
     411                 :           0 :         int                     i,
     412                 :             :                                 j;
     413                 :           0 :         WordEntry  *weptr = ARRPTR(vec);
     414                 :             : 
     415                 :           0 :         pq_begintypsend(&buf);
     416                 :             : 
     417                 :           0 :         pq_sendint32(&buf, vec->size);
     418         [ #  # ]:           0 :         for (i = 0; i < vec->size; i++)
     419                 :             :         {
     420                 :           0 :                 uint16          npos;
     421                 :             : 
     422                 :             :                 /*
     423                 :             :                  * the strings in the TSVector array are not null-terminated, so we
     424                 :             :                  * have to send the null-terminator separately
     425                 :             :                  */
     426                 :           0 :                 pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
     427                 :           0 :                 pq_sendbyte(&buf, '\0');
     428                 :             : 
     429         [ #  # ]:           0 :                 npos = POSDATALEN(vec, weptr);
     430                 :           0 :                 pq_sendint16(&buf, npos);
     431                 :             : 
     432         [ #  # ]:           0 :                 if (npos > 0)
     433                 :             :                 {
     434                 :           0 :                         WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
     435                 :             : 
     436         [ #  # ]:           0 :                         for (j = 0; j < npos; j++)
     437                 :           0 :                                 pq_sendint16(&buf, wepptr[j]);
     438                 :           0 :                 }
     439                 :           0 :                 weptr++;
     440                 :           0 :         }
     441                 :             : 
     442                 :           0 :         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     443                 :           0 : }
     444                 :             : 
     445                 :             : Datum
     446                 :           0 : tsvectorrecv(PG_FUNCTION_ARGS)
     447                 :             : {
     448                 :           0 :         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
     449                 :           0 :         TSVector        vec;
     450                 :           0 :         int                     i;
     451                 :           0 :         int32           nentries;
     452                 :           0 :         int                     datalen;                /* number of bytes used in the variable size
     453                 :             :                                                                  * area after fixed size TSVector header and
     454                 :             :                                                                  * WordEntries */
     455                 :           0 :         Size            hdrlen;
     456                 :           0 :         Size            len;                    /* allocated size of vec */
     457                 :           0 :         bool            needSort = false;
     458                 :             : 
     459                 :           0 :         nentries = pq_getmsgint(buf, sizeof(int32));
     460         [ #  # ]:           0 :         if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
     461   [ #  #  #  # ]:           0 :                 elog(ERROR, "invalid size of tsvector");
     462                 :             : 
     463                 :           0 :         hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;
     464                 :             : 
     465                 :           0 :         len = hdrlen * 2;                       /* times two to make room for lexemes */
     466                 :           0 :         vec = (TSVector) palloc0(len);
     467                 :           0 :         vec->size = nentries;
     468                 :             : 
     469                 :           0 :         datalen = 0;
     470         [ #  # ]:           0 :         for (i = 0; i < nentries; i++)
     471                 :             :         {
     472                 :           0 :                 const char *lexeme;
     473                 :           0 :                 uint16          npos;
     474                 :           0 :                 size_t          lex_len;
     475                 :             : 
     476                 :           0 :                 lexeme = pq_getmsgstring(buf);
     477                 :           0 :                 npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
     478                 :             : 
     479                 :             :                 /* sanity checks */
     480                 :             : 
     481                 :           0 :                 lex_len = strlen(lexeme);
     482         [ #  # ]:           0 :                 if (lex_len > MAXSTRLEN)
     483   [ #  #  #  # ]:           0 :                         elog(ERROR, "invalid tsvector: lexeme too long");
     484                 :             : 
     485         [ #  # ]:           0 :                 if (datalen > MAXSTRPOS)
     486   [ #  #  #  # ]:           0 :                         elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");
     487                 :             : 
     488         [ #  # ]:           0 :                 if (npos > MAXNUMPOS)
     489   [ #  #  #  # ]:           0 :                         elog(ERROR, "unexpected number of tsvector positions");
     490                 :             : 
     491                 :             :                 /*
     492                 :             :                  * Looks valid. Fill the WordEntry struct, and copy lexeme.
     493                 :             :                  *
     494                 :             :                  * But make sure the buffer is large enough first.
     495                 :             :                  */
     496   [ #  #  #  # ]:           0 :                 while (hdrlen + SHORTALIGN(datalen + lex_len) +
     497                 :           0 :                            sizeof(uint16) + npos * sizeof(WordEntryPos) >= len)
     498                 :             :                 {
     499                 :           0 :                         len *= 2;
     500                 :           0 :                         vec = (TSVector) repalloc(vec, len);
     501                 :             :                 }
     502                 :             : 
     503                 :           0 :                 vec->entries[i].haspos = (npos > 0) ? 1 : 0;
     504                 :           0 :                 vec->entries[i].len = lex_len;
     505                 :           0 :                 vec->entries[i].pos = datalen;
     506                 :             : 
     507                 :           0 :                 memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
     508                 :             : 
     509                 :           0 :                 datalen += lex_len;
     510                 :             : 
     511   [ #  #  #  #  :           0 :                 if (i > 0 && compareentry(&vec->entries[i],
             #  #  #  # ]
     512                 :           0 :                                                                   &vec->entries[i - 1],
     513                 :           0 :                                                                   STRPTR(vec)) <= 0)
     514                 :           0 :                         needSort = true;
     515                 :             : 
     516                 :             :                 /* Receive positions */
     517         [ #  # ]:           0 :                 if (npos > 0)
     518                 :             :                 {
     519                 :           0 :                         uint16          j;
     520                 :           0 :                         WordEntryPos *wepptr;
     521                 :             : 
     522                 :             :                         /*
     523                 :             :                          * Pad to 2-byte alignment if necessary. Though we used palloc0
     524                 :             :                          * for the initial allocation, subsequent repalloc'd memory areas
     525                 :             :                          * are not initialized to zero.
     526                 :             :                          */
     527         [ #  # ]:           0 :                         if (datalen != SHORTALIGN(datalen))
     528                 :             :                         {
     529                 :           0 :                                 *(STRPTR(vec) + datalen) = '\0';
     530                 :           0 :                                 datalen = SHORTALIGN(datalen);
     531                 :           0 :                         }
     532                 :             : 
     533                 :           0 :                         memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
     534                 :             : 
     535                 :           0 :                         wepptr = POSDATAPTR(vec, &vec->entries[i]);
     536         [ #  # ]:           0 :                         for (j = 0; j < npos; j++)
     537                 :             :                         {
     538                 :           0 :                                 wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
     539   [ #  #  #  # ]:           0 :                                 if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
     540   [ #  #  #  # ]:           0 :                                         elog(ERROR, "position information is misordered");
     541                 :           0 :                         }
     542                 :             : 
     543                 :           0 :                         datalen += sizeof(uint16) + npos * sizeof(WordEntryPos);
     544                 :           0 :                 }
     545                 :           0 :         }
     546                 :             : 
     547                 :           0 :         SET_VARSIZE(vec, hdrlen + datalen);
     548                 :             : 
     549         [ #  # ]:           0 :         if (needSort)
     550                 :           0 :                 qsort_arg(ARRPTR(vec), vec->size, sizeof(WordEntry),
     551                 :           0 :                                   compareentry, STRPTR(vec));
     552                 :             : 
     553                 :           0 :         PG_RETURN_TSVECTOR(vec);
     554                 :           0 : }

Generated by: LCOV version 2.3.2-1