LCOV - code coverage report
Current view: top level - src/test/modules/test_regex - test_regex.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 349 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 9 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*--------------------------------------------------------------------------
       2              :  *
       3              :  * test_regex.c
       4              :  *              Test harness for the regular expression package.
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  * IDENTIFICATION
      10              :  *              src/test/modules/test_regex/test_regex.c
      11              :  *
      12              :  * -------------------------------------------------------------------------
      13              :  */
      14              : 
      15              : #include "postgres.h"
      16              : 
      17              : #include "funcapi.h"
      18              : #include "regex/regex.h"
      19              : #include "utils/array.h"
      20              : #include "utils/builtins.h"
      21              : 
      22            0 : PG_MODULE_MAGIC;
      23              : 
      24              : 
      25              : /* all the options of interest for regex functions */
      26              : typedef struct test_re_flags
      27              : {
      28              :         int                     cflags;                 /* compile flags for Spencer's regex code */
      29              :         int                     eflags;                 /* execute flags for Spencer's regex code */
      30              :         long            info;                   /* expected re_info bits */
      31              :         bool            glob;                   /* do it globally (for each occurrence) */
      32              :         bool            indices;                /* report indices not actual strings */
      33              :         bool            partial;                /* expect partial match */
      34              : } test_re_flags;
      35              : 
      36              : /* cross-call state for test_regex() */
      37              : typedef struct test_regex_ctx
      38              : {
      39              :         test_re_flags re_flags;         /* flags */
      40              :         rm_detail_t details;            /* "details" from execution */
      41              :         text       *orig_str;           /* data string in original TEXT form */
      42              :         int                     nmatches;               /* number of places where pattern matched */
      43              :         int                     npatterns;              /* number of capturing subpatterns */
      44              :         /* We store start char index and end+1 char index for each match */
      45              :         /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      46              :         int                *match_locs;         /* 0-based character indexes */
      47              :         int                     next_match;             /* 0-based index of next match to process */
      48              :         /* workspace for build_test_match_result() */
      49              :         Datum      *elems;                      /* has npatterns+1 elements */
      50              :         bool       *nulls;                      /* has npatterns+1 elements */
      51              :         pg_wchar   *wide_str;           /* wide-char version of original string */
      52              :         char       *conv_buf;           /* conversion buffer, if needed */
      53              :         int                     conv_bufsiz;    /* size thereof */
      54              : } test_regex_ctx;
      55              : 
      56              : /* Local functions */
      57              : static void test_re_compile(text *text_re, int cflags, Oid collation,
      58              :                                                         regex_t *result_re);
      59              : static void parse_test_flags(test_re_flags *flags, text *opts);
      60              : static test_regex_ctx *setup_test_matches(text *orig_str,
      61              :                                                                                   regex_t *cpattern,
      62              :                                                                                   test_re_flags *re_flags,
      63              :                                                                                   Oid collation,
      64              :                                                                                   bool use_subpatterns);
      65              : static ArrayType *build_test_info_result(regex_t *cpattern,
      66              :                                                                                  test_re_flags *flags);
      67              : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
      68              : 
      69              : 
      70              : /*
      71              :  * test_regex(pattern text, string text, flags text) returns setof text[]
      72              :  *
      73              :  * This is largely based on regexp.c's regexp_matches, with additions
      74              :  * for debugging purposes.
      75              :  */
      76            0 : PG_FUNCTION_INFO_V1(test_regex);
      77              : 
      78              : Datum
      79            0 : test_regex(PG_FUNCTION_ARGS)
      80              : {
      81            0 :         FuncCallContext *funcctx;
      82            0 :         test_regex_ctx *matchctx;
      83            0 :         ArrayType  *result_ary;
      84              : 
      85            0 :         if (SRF_IS_FIRSTCALL())
      86              :         {
      87            0 :                 text       *pattern = PG_GETARG_TEXT_PP(0);
      88            0 :                 text       *flags = PG_GETARG_TEXT_PP(2);
      89            0 :                 Oid                     collation = PG_GET_COLLATION();
      90            0 :                 test_re_flags re_flags;
      91            0 :                 regex_t         cpattern;
      92            0 :                 MemoryContext oldcontext;
      93              : 
      94            0 :                 funcctx = SRF_FIRSTCALL_INIT();
      95            0 :                 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      96              : 
      97              :                 /* Determine options */
      98            0 :                 parse_test_flags(&re_flags, flags);
      99              : 
     100              :                 /* set up the compiled pattern */
     101            0 :                 test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
     102              : 
     103              :                 /* be sure to copy the input string into the multi-call ctx */
     104            0 :                 matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
     105              :                                                                           &re_flags,
     106            0 :                                                                           collation,
     107              :                                                                           true);
     108              : 
     109              :                 /* Pre-create workspace that build_test_match_result needs */
     110            0 :                 matchctx->elems = palloc_array(Datum, matchctx->npatterns + 1);
     111            0 :                 matchctx->nulls = palloc_array(bool, matchctx->npatterns + 1);
     112              : 
     113            0 :                 MemoryContextSwitchTo(oldcontext);
     114            0 :                 funcctx->user_fctx = matchctx;
     115              : 
     116              :                 /*
     117              :                  * Return the first result row, which is info equivalent to Tcl's
     118              :                  * "regexp -about" output
     119              :                  */
     120            0 :                 result_ary = build_test_info_result(&cpattern, &re_flags);
     121              : 
     122            0 :                 pg_regfree(&cpattern);
     123              : 
     124            0 :                 SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     125            0 :         }
     126              :         else
     127              :         {
     128              :                 /* Each subsequent row describes one match */
     129            0 :                 funcctx = SRF_PERCALL_SETUP();
     130            0 :                 matchctx = (test_regex_ctx *) funcctx->user_fctx;
     131              : 
     132            0 :                 if (matchctx->next_match < matchctx->nmatches)
     133              :                 {
     134            0 :                         result_ary = build_test_match_result(matchctx);
     135            0 :                         matchctx->next_match++;
     136            0 :                         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     137            0 :                 }
     138              :         }
     139              : 
     140            0 :         SRF_RETURN_DONE(funcctx);
     141            0 : }
     142              : 
     143              : 
     144              : /*
     145              :  * test_re_compile - compile a RE
     146              :  *
     147              :  *      text_re --- the pattern, expressed as a TEXT object
     148              :  *      cflags --- compile options for the pattern
     149              :  *      collation --- collation to use for LC_CTYPE-dependent behavior
     150              :  *  result_re --- output, compiled RE is stored here
     151              :  *
     152              :  * Pattern is given in the database encoding.  We internally convert to
     153              :  * an array of pg_wchar, which is what Spencer's regex package wants.
     154              :  *
     155              :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
     156              :  */
     157              : static void
     158            0 : test_re_compile(text *text_re, int cflags, Oid collation,
     159              :                                 regex_t *result_re)
     160              : {
     161            0 :         int                     text_re_len = VARSIZE_ANY_EXHDR(text_re);
     162            0 :         char       *text_re_val = VARDATA_ANY(text_re);
     163            0 :         pg_wchar   *pattern;
     164            0 :         int                     pattern_len;
     165            0 :         int                     regcomp_result;
     166            0 :         char            errMsg[100];
     167              : 
     168              :         /* Convert pattern string to wide characters */
     169            0 :         pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     170            0 :         pattern_len = pg_mb2wchar_with_len(text_re_val,
     171            0 :                                                                            pattern,
     172            0 :                                                                            text_re_len);
     173              : 
     174            0 :         regcomp_result = pg_regcomp(result_re,
     175            0 :                                                                 pattern,
     176            0 :                                                                 pattern_len,
     177            0 :                                                                 cflags,
     178            0 :                                                                 collation);
     179              : 
     180            0 :         pfree(pattern);
     181              : 
     182            0 :         if (regcomp_result != REG_OKAY)
     183              :         {
     184              :                 /* re didn't compile (no need for pg_regfree, if so) */
     185            0 :                 pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
     186            0 :                 ereport(ERROR,
     187              :                                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     188              :                                  errmsg("invalid regular expression: %s", errMsg)));
     189            0 :         }
     190            0 : }
     191              : 
     192              : /*
     193              :  * test_re_execute - execute a RE on pg_wchar data
     194              :  *
     195              :  * Returns true on match, false on no match
     196              :  * Arguments are as for pg_regexec
     197              :  */
     198              : static bool
     199            0 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
     200              :                                 int start_search,
     201              :                                 rm_detail_t *details,
     202              :                                 int nmatch, regmatch_t *pmatch,
     203              :                                 int eflags)
     204              : {
     205            0 :         int                     regexec_result;
     206            0 :         char            errMsg[100];
     207              : 
     208              :         /* Initialize match locations in case engine doesn't */
     209            0 :         details->rm_extend.rm_so = -1;
     210            0 :         details->rm_extend.rm_eo = -1;
     211            0 :         for (int i = 0; i < nmatch; i++)
     212              :         {
     213            0 :                 pmatch[i].rm_so = -1;
     214            0 :                 pmatch[i].rm_eo = -1;
     215            0 :         }
     216              : 
     217              :         /* Perform RE match and return result */
     218            0 :         regexec_result = pg_regexec(re,
     219            0 :                                                                 data,
     220            0 :                                                                 data_len,
     221            0 :                                                                 start_search,
     222            0 :                                                                 details,
     223            0 :                                                                 nmatch,
     224            0 :                                                                 pmatch,
     225            0 :                                                                 eflags);
     226              : 
     227            0 :         if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     228              :         {
     229              :                 /* re failed??? */
     230            0 :                 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     231            0 :                 ereport(ERROR,
     232              :                                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     233              :                                  errmsg("regular expression failed: %s", errMsg)));
     234            0 :         }
     235              : 
     236            0 :         return (regexec_result == REG_OKAY);
     237            0 : }
     238              : 
     239              : 
     240              : /*
     241              :  * parse_test_flags - parse the flags argument
     242              :  *
     243              :  *      flags --- output argument, filled with desired options
     244              :  *      opts --- TEXT object, or NULL for defaults
     245              :  */
     246              : static void
     247            0 : parse_test_flags(test_re_flags *flags, text *opts)
     248              : {
     249              :         /* these defaults must match Tcl's */
     250            0 :         int                     cflags = REG_ADVANCED;
     251            0 :         int                     eflags = 0;
     252            0 :         long            info = 0;
     253              : 
     254            0 :         flags->glob = false;
     255            0 :         flags->indices = false;
     256            0 :         flags->partial = false;
     257              : 
     258            0 :         if (opts)
     259              :         {
     260            0 :                 char       *opt_p = VARDATA_ANY(opts);
     261            0 :                 int                     opt_len = VARSIZE_ANY_EXHDR(opts);
     262            0 :                 int                     i;
     263              : 
     264            0 :                 for (i = 0; i < opt_len; i++)
     265              :                 {
     266            0 :                         switch (opt_p[i])
     267              :                         {
     268              :                                 case '-':
     269              :                                         /* allowed, no-op */
     270              :                                         break;
     271              :                                 case '!':
     272            0 :                                         flags->partial = true;
     273            0 :                                         break;
     274              :                                 case '*':
     275              :                                         /* test requires Unicode --- ignored here */
     276              :                                         break;
     277              :                                 case '0':
     278            0 :                                         flags->indices = true;
     279            0 :                                         break;
     280              : 
     281              :                                         /* These flags correspond to user-exposed RE options: */
     282              :                                 case 'g':               /* global match */
     283            0 :                                         flags->glob = true;
     284            0 :                                         break;
     285              :                                 case 'i':               /* case insensitive */
     286            0 :                                         cflags |= REG_ICASE;
     287            0 :                                         break;
     288              :                                 case 'n':               /* \n affects ^ $ . [^ */
     289            0 :                                         cflags |= REG_NEWLINE;
     290            0 :                                         break;
     291              :                                 case 'p':               /* ~Perl, \n affects . [^ */
     292            0 :                                         cflags |= REG_NLSTOP;
     293            0 :                                         cflags &= ~REG_NLANCH;
     294            0 :                                         break;
     295              :                                 case 'w':               /* weird, \n affects ^ $ only */
     296            0 :                                         cflags &= ~REG_NLSTOP;
     297            0 :                                         cflags |= REG_NLANCH;
     298            0 :                                         break;
     299              :                                 case 'x':               /* expanded syntax */
     300            0 :                                         cflags |= REG_EXPANDED;
     301            0 :                                         break;
     302              : 
     303              :                                         /* These flags correspond to Tcl's -xflags options: */
     304              :                                 case 'a':
     305            0 :                                         cflags |= REG_ADVF;
     306            0 :                                         break;
     307              :                                 case 'b':
     308            0 :                                         cflags &= ~REG_ADVANCED;
     309            0 :                                         break;
     310              :                                 case 'c':
     311              : 
     312              :                                         /*
     313              :                                          * Tcl calls this TCL_REG_CANMATCH, but it's really
     314              :                                          * REG_EXPECT.  In this implementation we must also set
     315              :                                          * the partial and indices flags, so that
     316              :                                          * setup_test_matches and build_test_match_result will
     317              :                                          * emit the desired data.  (They'll emit more fields than
     318              :                                          * Tcl would, but that's fine.)
     319              :                                          */
     320            0 :                                         cflags |= REG_EXPECT;
     321            0 :                                         flags->partial = true;
     322            0 :                                         flags->indices = true;
     323            0 :                                         break;
     324              :                                 case 'e':
     325            0 :                                         cflags &= ~REG_ADVANCED;
     326            0 :                                         cflags |= REG_EXTENDED;
     327            0 :                                         break;
     328              :                                 case 'q':
     329            0 :                                         cflags &= ~REG_ADVANCED;
     330            0 :                                         cflags |= REG_QUOTE;
     331            0 :                                         break;
     332              :                                 case 'o':               /* o for opaque */
     333            0 :                                         cflags |= REG_NOSUB;
     334            0 :                                         break;
     335              :                                 case 's':               /* s for start */
     336            0 :                                         cflags |= REG_BOSONLY;
     337            0 :                                         break;
     338              :                                 case '+':
     339            0 :                                         cflags |= REG_FAKE;
     340            0 :                                         break;
     341              :                                 case ',':
     342            0 :                                         cflags |= REG_PROGRESS;
     343            0 :                                         break;
     344              :                                 case '.':
     345            0 :                                         cflags |= REG_DUMP;
     346            0 :                                         break;
     347              :                                 case ':':
     348            0 :                                         eflags |= REG_MTRACE;
     349            0 :                                         break;
     350              :                                 case ';':
     351            0 :                                         eflags |= REG_FTRACE;
     352            0 :                                         break;
     353              :                                 case '^':
     354            0 :                                         eflags |= REG_NOTBOL;
     355            0 :                                         break;
     356              :                                 case '$':
     357            0 :                                         eflags |= REG_NOTEOL;
     358            0 :                                         break;
     359              :                                 case 't':
     360            0 :                                         cflags |= REG_EXPECT;
     361            0 :                                         break;
     362              :                                 case '%':
     363            0 :                                         eflags |= REG_SMALL;
     364            0 :                                         break;
     365              : 
     366              :                                         /* These flags define expected info bits: */
     367              :                                 case 'A':
     368            0 :                                         info |= REG_UBSALNUM;
     369            0 :                                         break;
     370              :                                 case 'B':
     371            0 :                                         info |= REG_UBRACES;
     372            0 :                                         break;
     373              :                                 case 'E':
     374            0 :                                         info |= REG_UBBS;
     375            0 :                                         break;
     376              :                                 case 'H':
     377            0 :                                         info |= REG_ULOOKAROUND;
     378            0 :                                         break;
     379              :                                 case 'I':
     380            0 :                                         info |= REG_UIMPOSSIBLE;
     381            0 :                                         break;
     382              :                                 case 'L':
     383            0 :                                         info |= REG_ULOCALE;
     384            0 :                                         break;
     385              :                                 case 'M':
     386            0 :                                         info |= REG_UUNPORT;
     387            0 :                                         break;
     388              :                                 case 'N':
     389            0 :                                         info |= REG_UEMPTYMATCH;
     390            0 :                                         break;
     391              :                                 case 'P':
     392            0 :                                         info |= REG_UNONPOSIX;
     393            0 :                                         break;
     394              :                                 case 'Q':
     395            0 :                                         info |= REG_UBOUNDS;
     396            0 :                                         break;
     397              :                                 case 'R':
     398            0 :                                         info |= REG_UBACKREF;
     399            0 :                                         break;
     400              :                                 case 'S':
     401            0 :                                         info |= REG_UUNSPEC;
     402            0 :                                         break;
     403              :                                 case 'T':
     404            0 :                                         info |= REG_USHORTEST;
     405            0 :                                         break;
     406              :                                 case 'U':
     407            0 :                                         info |= REG_UPBOTCH;
     408            0 :                                         break;
     409              : 
     410              :                                 default:
     411            0 :                                         ereport(ERROR,
     412              :                                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     413              :                                                          errmsg("invalid regular expression test option: \"%.*s\"",
     414              :                                                                         pg_mblen(opt_p + i), opt_p + i)));
     415            0 :                                         break;
     416              :                         }
     417            0 :                 }
     418            0 :         }
     419            0 :         flags->cflags = cflags;
     420            0 :         flags->eflags = eflags;
     421            0 :         flags->info = info;
     422            0 : }
     423              : 
     424              : /*
     425              :  * setup_test_matches --- do the initial matching
     426              :  *
     427              :  * To simplify memory management, we do all the matching in one swoop.
     428              :  * The returned test_regex_ctx contains the locations of all the substrings
     429              :  * matching the pattern.
     430              :  */
     431              : static test_regex_ctx *
     432            0 : setup_test_matches(text *orig_str,
     433              :                                    regex_t *cpattern, test_re_flags *re_flags,
     434              :                                    Oid collation,
     435              :                                    bool use_subpatterns)
     436              : {
     437            0 :         test_regex_ctx *matchctx = palloc0_object(test_regex_ctx);
     438            0 :         int                     eml = pg_database_encoding_max_length();
     439            0 :         int                     orig_len;
     440            0 :         pg_wchar   *wide_str;
     441            0 :         int                     wide_len;
     442            0 :         regmatch_t *pmatch;
     443            0 :         int                     pmatch_len;
     444            0 :         int                     array_len;
     445            0 :         int                     array_idx;
     446            0 :         int                     prev_match_end;
     447            0 :         int                     start_search;
     448            0 :         int                     maxlen = 0;             /* largest fetch length in characters */
     449              : 
     450              :         /* save flags */
     451            0 :         matchctx->re_flags = *re_flags;
     452              : 
     453              :         /* save original string --- we'll extract result substrings from it */
     454            0 :         matchctx->orig_str = orig_str;
     455              : 
     456              :         /* convert string to pg_wchar form for matching */
     457            0 :         orig_len = VARSIZE_ANY_EXHDR(orig_str);
     458            0 :         wide_str = palloc_array(pg_wchar, orig_len + 1);
     459            0 :         wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
     460              : 
     461              :         /* do we want to remember subpatterns? */
     462            0 :         if (use_subpatterns && cpattern->re_nsub > 0)
     463              :         {
     464            0 :                 matchctx->npatterns = cpattern->re_nsub + 1;
     465            0 :                 pmatch_len = cpattern->re_nsub + 1;
     466            0 :         }
     467              :         else
     468              :         {
     469            0 :                 use_subpatterns = false;
     470            0 :                 matchctx->npatterns = 1;
     471            0 :                 pmatch_len = 1;
     472              :         }
     473              : 
     474              :         /* temporary output space for RE package */
     475            0 :         pmatch = palloc_array(regmatch_t, pmatch_len);
     476              : 
     477              :         /*
     478              :          * the real output space (grown dynamically if needed)
     479              :          *
     480              :          * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
     481              :          * than at 2^27
     482              :          */
     483            0 :         array_len = re_flags->glob ? 255 : 31;
     484            0 :         matchctx->match_locs = palloc_array(int, array_len);
     485            0 :         array_idx = 0;
     486              : 
     487              :         /* search for the pattern, perhaps repeatedly */
     488            0 :         prev_match_end = 0;
     489            0 :         start_search = 0;
     490            0 :         while (test_re_execute(cpattern, wide_str, wide_len,
     491            0 :                                                    start_search,
     492            0 :                                                    &matchctx->details,
     493            0 :                                                    pmatch_len, pmatch,
     494            0 :                                                    re_flags->eflags))
     495              :         {
     496              :                 /* enlarge output space if needed */
     497            0 :                 while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     498              :                 {
     499            0 :                         array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     500            0 :                         if (array_len > MaxAllocSize / sizeof(int))
     501            0 :                                 ereport(ERROR,
     502              :                                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     503              :                                                  errmsg("too many regular expression matches")));
     504            0 :                         matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     505            0 :                                                                                                         sizeof(int) * array_len);
     506              :                 }
     507              : 
     508              :                 /* save this match's locations */
     509            0 :                 for (int i = 0; i < matchctx->npatterns; i++)
     510              :                 {
     511            0 :                         int                     so = pmatch[i].rm_so;
     512            0 :                         int                     eo = pmatch[i].rm_eo;
     513              : 
     514            0 :                         matchctx->match_locs[array_idx++] = so;
     515            0 :                         matchctx->match_locs[array_idx++] = eo;
     516            0 :                         if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
     517            0 :                                 maxlen = (eo - so);
     518            0 :                 }
     519            0 :                 matchctx->nmatches++;
     520            0 :                 prev_match_end = pmatch[0].rm_eo;
     521              : 
     522              :                 /* if not glob, stop after one match */
     523            0 :                 if (!re_flags->glob)
     524            0 :                         break;
     525              : 
     526              :                 /*
     527              :                  * Advance search position.  Normally we start the next search at the
     528              :                  * end of the previous match; but if the match was of zero length, we
     529              :                  * have to advance by one character, or we'd just find the same match
     530              :                  * again.
     531              :                  */
     532            0 :                 start_search = prev_match_end;
     533            0 :                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
     534            0 :                         start_search++;
     535            0 :                 if (start_search > wide_len)
     536            0 :                         break;
     537              :         }
     538              : 
     539              :         /*
     540              :          * If we had no match, but "partial" and "indices" are set, emit the
     541              :          * details.
     542              :          */
     543            0 :         if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
     544              :         {
     545              :                 /* enlarge output space if needed */
     546            0 :                 while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     547              :                 {
     548            0 :                         array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     549            0 :                         if (array_len > MaxAllocSize / sizeof(int))
     550            0 :                                 ereport(ERROR,
     551              :                                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     552              :                                                  errmsg("too many regular expression matches")));
     553            0 :                         matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     554            0 :                                                                                                         sizeof(int) * array_len);
     555              :                 }
     556              : 
     557            0 :                 matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
     558            0 :                 matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
     559              :                 /* we don't have pmatch data, so emit -1 */
     560            0 :                 for (int i = 1; i < matchctx->npatterns; i++)
     561              :                 {
     562            0 :                         matchctx->match_locs[array_idx++] = -1;
     563            0 :                         matchctx->match_locs[array_idx++] = -1;
     564            0 :                 }
     565            0 :                 matchctx->nmatches++;
     566            0 :         }
     567              : 
     568            0 :         Assert(array_idx <= array_len);
     569              : 
     570            0 :         if (eml > 1)
     571              :         {
     572            0 :                 int64           maxsiz = eml * (int64) maxlen;
     573            0 :                 int                     conv_bufsiz;
     574              : 
     575              :                 /*
     576              :                  * Make the conversion buffer large enough for any substring of
     577              :                  * interest.
     578              :                  *
     579              :                  * Worst case: assume we need the maximum size (maxlen*eml), but take
     580              :                  * advantage of the fact that the original string length in bytes is
     581              :                  * an upper bound on the byte length of any fetched substring (and we
     582              :                  * know that len+1 is safe to allocate because the varlena header is
     583              :                  * longer than 1 byte).
     584              :                  */
     585            0 :                 if (maxsiz > orig_len)
     586            0 :                         conv_bufsiz = orig_len + 1;
     587              :                 else
     588            0 :                         conv_bufsiz = maxsiz + 1;       /* safe since maxsiz < 2^30 */
     589              : 
     590            0 :                 matchctx->conv_buf = palloc(conv_bufsiz);
     591            0 :                 matchctx->conv_bufsiz = conv_bufsiz;
     592            0 :                 matchctx->wide_str = wide_str;
     593            0 :         }
     594              :         else
     595              :         {
     596              :                 /* No need to keep the wide string if we're in a single-byte charset. */
     597            0 :                 pfree(wide_str);
     598            0 :                 matchctx->wide_str = NULL;
     599            0 :                 matchctx->conv_buf = NULL;
     600            0 :                 matchctx->conv_bufsiz = 0;
     601              :         }
     602              : 
     603              :         /* Clean up temp storage */
     604            0 :         pfree(pmatch);
     605              : 
     606            0 :         return matchctx;
     607            0 : }
     608              : 
     609              : /*
     610              :  * build_test_info_result - build output array describing compiled regexp
     611              :  *
     612              :  * This borrows some code from Tcl's TclRegAbout().
     613              :  */
     614              : static ArrayType *
     615            0 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
     616              : {
     617              :         /* Translation data for flag bits in regex_t.re_info */
     618              :         struct infoname
     619              :         {
     620              :                 int                     bit;
     621              :                 const char *text;
     622              :         };
     623              :         static const struct infoname infonames[] = {
     624              :                 {REG_UBACKREF, "REG_UBACKREF"},
     625              :                 {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
     626              :                 {REG_UBOUNDS, "REG_UBOUNDS"},
     627              :                 {REG_UBRACES, "REG_UBRACES"},
     628              :                 {REG_UBSALNUM, "REG_UBSALNUM"},
     629              :                 {REG_UPBOTCH, "REG_UPBOTCH"},
     630              :                 {REG_UBBS, "REG_UBBS"},
     631              :                 {REG_UNONPOSIX, "REG_UNONPOSIX"},
     632              :                 {REG_UUNSPEC, "REG_UUNSPEC"},
     633              :                 {REG_UUNPORT, "REG_UUNPORT"},
     634              :                 {REG_ULOCALE, "REG_ULOCALE"},
     635              :                 {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
     636              :                 {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
     637              :                 {REG_USHORTEST, "REG_USHORTEST"},
     638              :                 {0, NULL}
     639              :         };
     640            0 :         const struct infoname *inf;
     641            0 :         Datum           elems[lengthof(infonames) + 1];
     642            0 :         int                     nresults = 0;
     643            0 :         char            buf[80];
     644            0 :         int                     dims[1];
     645            0 :         int                     lbs[1];
     646              : 
     647              :         /* Set up results: first, the number of subexpressions */
     648            0 :         snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
     649            0 :         elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     650              : 
     651              :         /* Report individual info bit states */
     652            0 :         for (inf = infonames; inf->bit != 0; inf++)
     653              :         {
     654            0 :                 if (cpattern->re_info & inf->bit)
     655              :                 {
     656            0 :                         if (flags->info & inf->bit)
     657            0 :                                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
     658              :                         else
     659              :                         {
     660            0 :                                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
     661            0 :                                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     662              :                         }
     663            0 :                 }
     664              :                 else
     665              :                 {
     666            0 :                         if (flags->info & inf->bit)
     667              :                         {
     668            0 :                                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
     669            0 :                                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     670            0 :                         }
     671              :                 }
     672            0 :         }
     673              : 
     674              :         /* And form an array */
     675            0 :         dims[0] = nresults;
     676            0 :         lbs[0] = 1;
     677              :         /* XXX: this hardcodes assumptions about the text type */
     678            0 :         return construct_md_array(elems, NULL, 1, dims, lbs,
     679              :                                                           TEXTOID, -1, false, TYPALIGN_INT);
     680            0 : }
     681              : 
     682              : /*
     683              :  * build_test_match_result - build output array for current match
     684              :  *
     685              :  * Note that if the indices flag is set, we don't need any strings,
     686              :  * just the location data.
     687              :  */
     688              : static ArrayType *
     689            0 : build_test_match_result(test_regex_ctx *matchctx)
     690              : {
     691            0 :         char       *buf = matchctx->conv_buf;
     692            0 :         Datum      *elems = matchctx->elems;
     693            0 :         bool       *nulls = matchctx->nulls;
     694            0 :         bool            indices = matchctx->re_flags.indices;
     695            0 :         char            bufstr[80];
     696            0 :         int                     dims[1];
     697            0 :         int                     lbs[1];
     698            0 :         int                     loc;
     699            0 :         int                     i;
     700              : 
     701              :         /* Extract matching substrings from the original string */
     702            0 :         loc = matchctx->next_match * matchctx->npatterns * 2;
     703            0 :         for (i = 0; i < matchctx->npatterns; i++)
     704              :         {
     705            0 :                 int                     so = matchctx->match_locs[loc++];
     706            0 :                 int                     eo = matchctx->match_locs[loc++];
     707              : 
     708            0 :                 if (indices)
     709              :                 {
     710              :                         /* Report eo this way for consistency with Tcl */
     711            0 :                         snprintf(bufstr, sizeof(bufstr), "%d %d",
     712            0 :                                          so, so < 0 ? eo : eo - 1);
     713            0 :                         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     714            0 :                         nulls[i] = false;
     715            0 :                 }
     716            0 :                 else if (so < 0 || eo < 0)
     717              :                 {
     718            0 :                         elems[i] = (Datum) 0;
     719            0 :                         nulls[i] = true;
     720            0 :                 }
     721            0 :                 else if (buf)
     722              :                 {
     723            0 :                         int                     len = pg_wchar2mb_with_len(matchctx->wide_str + so,
     724            0 :                                                                                                    buf,
     725            0 :                                                                                                    eo - so);
     726              : 
     727            0 :                         Assert(len < matchctx->conv_bufsiz);
     728            0 :                         elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
     729            0 :                         nulls[i] = false;
     730            0 :                 }
     731              :                 else
     732              :                 {
     733            0 :                         elems[i] = DirectFunctionCall3(text_substr,
     734              :                                                                                    PointerGetDatum(matchctx->orig_str),
     735              :                                                                                    Int32GetDatum(so + 1),
     736              :                                                                                    Int32GetDatum(eo - so));
     737            0 :                         nulls[i] = false;
     738              :                 }
     739            0 :         }
     740              : 
     741              :         /* In EXPECT indices mode, also report the "details" */
     742            0 :         if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
     743              :         {
     744            0 :                 int                     so = matchctx->details.rm_extend.rm_so;
     745            0 :                 int                     eo = matchctx->details.rm_extend.rm_eo;
     746              : 
     747            0 :                 snprintf(bufstr, sizeof(bufstr), "%d %d",
     748            0 :                                  so, so < 0 ? eo : eo - 1);
     749            0 :                 elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     750            0 :                 nulls[i] = false;
     751            0 :                 i++;
     752            0 :         }
     753              : 
     754              :         /* And form an array */
     755            0 :         dims[0] = i;
     756            0 :         lbs[0] = 1;
     757              :         /* XXX: this hardcodes assumptions about the text type */
     758            0 :         return construct_md_array(elems, nulls, 1, dims, lbs,
     759              :                                                           TEXTOID, -1, false, TYPALIGN_INT);
     760            0 : }
        

Generated by: LCOV version 2.3.2-1