LCOV - code coverage report
Current view: top level - src/include/tsearch - ts_utils.h (source / functions) Coverage Total Hit
Test: Code coverage Lines: 50.0 % 4 2
Test Date: 2026-01-26 10:56:24 Functions: 50.0 % 2 1
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * ts_utils.h
       4              :  *        helper utilities for tsearch
       5              :  *
       6              :  * Copyright (c) 1998-2026, PostgreSQL Global Development Group
       7              :  *
       8              :  * src/include/tsearch/ts_utils.h
       9              :  *
      10              :  *-------------------------------------------------------------------------
      11              :  */
      12              : #ifndef _PG_TS_UTILS_H_
      13              : #define _PG_TS_UTILS_H_
      14              : 
      15              : #include "nodes/pg_list.h"
      16              : #include "tsearch/ts_public.h"
      17              : #include "tsearch/ts_type.h"
      18              : 
      19              : /*
      20              :  * Common parse definitions for tsvector and tsquery
      21              :  */
      22              : 
      23              : /* tsvector parser support. */
      24              : 
      25              : struct TSVectorParseStateData;  /* opaque struct in tsvector_parser.c */
      26              : typedef struct TSVectorParseStateData *TSVectorParseState;
      27              : 
      28              : /* flag bits that can be passed to init_tsvector_parser: */
      29              : #define P_TSV_OPR_IS_DELIM      (1 << 0)
      30              : #define P_TSV_IS_TSQUERY        (1 << 1)
      31              : #define P_TSV_IS_WEB            (1 << 2)
      32              : 
      33              : extern TSVectorParseState init_tsvector_parser(char *input, int flags,
      34              :                                                                                            Node *escontext);
      35              : extern void reset_tsvector_parser(TSVectorParseState state, char *input);
      36              : extern bool gettoken_tsvector(TSVectorParseState state,
      37              :                                                           char **strval, int *lenval,
      38              :                                                           WordEntryPos **pos_ptr, int *poslen,
      39              :                                                           char **endptr);
      40              : extern void close_tsvector_parser(TSVectorParseState state);
      41              : 
      42              : /* phrase operator begins with '<' */
      43              : #define ISOPERATOR(x) \
      44              :         ( pg_mblen(x) == 1 && ( *(x) == '!' ||  \
      45              :                                                         *(x) == '&' ||      \
      46              :                                                         *(x) == '|' ||  \
      47              :                                                         *(x) == '(' ||  \
      48              :                                                         *(x) == ')' ||  \
      49              :                                                         *(x) == '<'          \
      50              :                                                   ) )
      51              : 
      52              : /* parse_tsquery */
      53              : 
      54              : struct TSQueryParserStateData;  /* private in backend/utils/adt/tsquery.c */
      55              : typedef struct TSQueryParserStateData *TSQueryParserState;
      56              : 
      57              : typedef void (*PushFunction) (void *opaque, TSQueryParserState state,
      58              :                                                           char *token, int tokenlen,
      59              :                                                           int16 tokenweights,   /* bitmap as described in
      60              :                                                                                                          * QueryOperand struct */
      61              :                                                           bool prefix);
      62              : 
      63              : /* flag bits that can be passed to parse_tsquery: */
      64              : #define P_TSQ_PLAIN             (1 << 0)
      65              : #define P_TSQ_WEB               (1 << 1)
      66              : 
      67              : extern TSQuery parse_tsquery(char *buf,
      68              :                                                          PushFunction pushval,
      69              :                                                          void *opaque,
      70              :                                                          int flags,
      71              :                                                          Node *escontext);
      72              : 
      73              : /* Functions for use by PushFunction implementations */
      74              : extern void pushValue(TSQueryParserState state,
      75              :                                           char *strval, int lenval, int16 weight, bool prefix);
      76              : extern void pushStop(TSQueryParserState state);
      77              : extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
      78              : 
      79              : /*
      80              :  * parse plain text and lexize words
      81              :  */
      82              : typedef struct
      83              : {
      84              :         uint16          flags;                  /* currently, only TSL_PREFIX */
      85              :         uint16          len;
      86              :         uint16          nvariant;
      87              :         uint16          alen;
      88              :         union
      89              :         {
      90              :                 uint16          pos;
      91              : 
      92              :                 /*
      93              :                  * When apos array is used, apos[0] is the number of elements in the
      94              :                  * array (excluding apos[0]), and alen is the allocated size of the
      95              :                  * array.  We do not allow more than MAXNUMPOS array elements.
      96              :                  */
      97              :                 uint16     *apos;
      98              :         }                       pos;
      99              :         char       *word;
     100              : } ParsedWord;
     101              : 
     102              : typedef struct
     103              : {
     104              :         ParsedWord *words;
     105              :         int32           lenwords;
     106              :         int32           curwords;
     107              :         int32           pos;
     108              : } ParsedText;
     109              : 
     110              : extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
     111              : 
     112              : /*
     113              :  * headline framework, flow in common to generate:
     114              :  *      1 parse text with hlparsetext
     115              :  *      2 parser-specific function to find part
     116              :  *      3 generateHeadline to generate result text
     117              :  */
     118              : 
     119              : extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
     120              :                                                 char *buf, int32 buflen);
     121              : extern text *generateHeadline(HeadlineParsedText *prs);
     122              : 
     123              : /*
     124              :  * TSQuery execution support
     125              :  *
     126              :  * TS_execute() executes a tsquery against data that can be represented in
     127              :  * various forms.  The TSExecuteCallback callback function is called to check
     128              :  * whether a given primitive tsquery value is matched in the data.
     129              :  */
     130              : 
     131              : /* TS_execute requires ternary logic to handle NOT with phrase matches */
     132              : typedef enum
     133              : {
     134              :         TS_NO,                                          /* definitely no match */
     135              :         TS_YES,                                         /* definitely does match */
     136              :         TS_MAYBE,                                       /* can't verify match for lack of pos data */
     137              : } TSTernaryValue;
     138              : 
     139              : /*
     140              :  * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
     141              :  * lexeme position data (because of a phrase-match operator in the tsquery).
     142              :  * The callback should fill in position data when it returns TS_YES (success).
     143              :  * If it cannot return position data, it should leave "data" unchanged and
     144              :  * return TS_MAYBE.  The caller of TS_execute() must then arrange for a later
     145              :  * recheck with position data available.
     146              :  *
     147              :  * The reported lexeme positions must be sorted and unique.  Callers must only
     148              :  * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
     149              :  * This allows the returned "pos" to point directly to the WordEntryPos
     150              :  * portion of a tsvector value.  If "allocated" is true then the pos array
     151              :  * is palloc'd workspace and caller may free it when done.
     152              :  *
     153              :  * "negate" means that the pos array contains positions where the query does
     154              :  * not match, rather than positions where it does.  "width" is positive when
     155              :  * the match is wider than one lexeme.  Neither of these fields normally need
     156              :  * to be touched by TSExecuteCallback functions; they are used for
     157              :  * phrase-search processing within TS_execute.
     158              :  *
     159              :  * All fields of the ExecPhraseData struct are initially zeroed by caller.
     160              :  */
     161              : typedef struct ExecPhraseData
     162              : {
     163              :         int                     npos;                   /* number of positions reported */
     164              :         bool            allocated;              /* pos points to palloc'd data? */
     165              :         bool            negate;                 /* positions are where query is NOT matched */
     166              :         WordEntryPos *pos;                      /* ordered, non-duplicate lexeme positions */
     167              :         int                     width;                  /* width of match in lexemes, less 1 */
     168              : } ExecPhraseData;
     169              : 
     170              : /*
     171              :  * Signature for TSQuery lexeme check functions
     172              :  *
     173              :  * arg: opaque value passed through from caller of TS_execute
     174              :  * val: lexeme to test for presence of
     175              :  * data: to be filled with lexeme positions; NULL if position data not needed
     176              :  *
     177              :  * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
     178              :  * present, TS_NO if it definitely is not present.  If data is not NULL,
     179              :  * it must be filled with lexeme positions if available.  If position data
     180              :  * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
     181              :  */
     182              : typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
     183              :                                                                                          ExecPhraseData *data);
     184              : 
     185              : /*
     186              :  * Flag bits for TS_execute
     187              :  */
     188              : #define TS_EXEC_EMPTY                   (0x00)
     189              : /*
     190              :  * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
     191              :  * evaluated to be true.  This was formerly the default behavior.  It's now
     192              :  * deprecated because it tends to give silly answers, but some applications
     193              :  * might still have a use for it.
     194              :  */
     195              : #define TS_EXEC_SKIP_NOT                (0x01)
     196              : /*
     197              :  * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
     198              :  * in the absence of position information: a true result indicates that the
     199              :  * phrase might be present.  Without this flag, OP_PHRASE always returns
     200              :  * false if lexeme position information is not available.
     201              :  */
     202              : #define TS_EXEC_PHRASE_NO_POS   (0x02)
     203              : 
     204              : extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
     205              :                                            TSExecuteCallback chkcond);
     206              : extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
     207              :                                                                                  uint32 flags,
     208              :                                                                                  TSExecuteCallback chkcond);
     209              : extern List *TS_execute_locations(QueryItem *curitem, void *arg,
     210              :                                                                   uint32 flags,
     211              :                                                                   TSExecuteCallback chkcond);
     212              : extern bool tsquery_requires_match(QueryItem *curitem);
     213              : 
     214              : /*
     215              :  * to_ts* - text transformation to tsvector, tsquery
     216              :  */
     217              : extern TSVector make_tsvector(ParsedText *prs);
     218              : extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
     219              : 
     220              : /*
     221              :  * Possible strategy numbers for indexes
     222              :  *        TSearchStrategyNumber  - (tsvector|text) @@ tsquery
     223              :  *        TSearchWithClassStrategyNumber  - tsvector @@@ tsquery
     224              :  */
     225              : #define TSearchStrategyNumber                   1
     226              : #define TSearchWithClassStrategyNumber  2
     227              : 
     228              : /*
     229              :  * TSQuery Utilities
     230              :  */
     231              : extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
     232              : extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
     233              : 
     234              : typedef struct QTNode
     235              : {
     236              :         QueryItem  *valnode;
     237              :         uint32          flags;
     238              :         int32           nchild;
     239              :         char       *word;
     240              :         uint32          sign;
     241              :         struct QTNode **child;
     242              : } QTNode;
     243              : 
     244              : /* bits in QTNode.flags */
     245              : #define QTN_NEEDFREE    0x01
     246              : #define QTN_NOCHANGE    0x02
     247              : #define QTN_WORDFREE    0x04
     248              : 
     249              : typedef uint64 TSQuerySign;
     250              : 
     251              : #define TSQS_SIGLEN  (sizeof(TSQuerySign)*BITS_PER_BYTE)
     252              : 
     253              : static inline Datum
     254            6 : TSQuerySignGetDatum(TSQuerySign X)
     255              : {
     256            6 :         return Int64GetDatum((int64) X);
     257              : }
     258              : 
     259              : static inline TSQuerySign
     260            0 : DatumGetTSQuerySign(Datum X)
     261              : {
     262            0 :         return (TSQuerySign) DatumGetInt64(X);
     263              : }
     264              : 
     265              : #define PG_RETURN_TSQUERYSIGN(X)        return TSQuerySignGetDatum(X)
     266              : #define PG_GETARG_TSQUERYSIGN(n)        DatumGetTSQuerySign(PG_GETARG_DATUM(n))
     267              : 
     268              : 
     269              : extern QTNode *QT2QTN(QueryItem *in, char *operand);
     270              : extern TSQuery QTN2QT(QTNode *in);
     271              : extern void QTNFree(QTNode *in);
     272              : extern void QTNSort(QTNode *in);
     273              : extern void QTNTernary(QTNode *in);
     274              : extern void QTNBinary(QTNode *in);
     275              : extern int      QTNodeCompare(QTNode *an, QTNode *bn);
     276              : extern QTNode *QTNCopy(QTNode *in);
     277              : extern void QTNClearFlags(QTNode *in, uint32 flags);
     278              : extern bool QTNEq(QTNode *a, QTNode *b);
     279              : extern TSQuerySign makeTSQuerySign(TSQuery a);
     280              : extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
     281              :                                                         bool *isfind);
     282              : 
     283              : #endif                                                  /* _PG_TS_UTILS_H_ */
        

Generated by: LCOV version 2.3.2-1