LCOV - code coverage report
Current view: top level - src/interfaces/ecpg/preproc - parser.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 107 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 4 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * parser.c
       4              :  *              Main entry point/driver for PostgreSQL grammar
       5              :  *
       6              :  * This should match src/backend/parser/parser.c, except that we do not
       7              :  * need to bother with re-entrant interfaces.
       8              :  *
       9              :  * Note: ECPG doesn't report error location like the backend does.
      10              :  * This file will need work if we ever want it to.
      11              :  *
      12              :  *
      13              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      14              :  * Portions Copyright (c) 1994, Regents of the University of California
      15              :  *
      16              :  * IDENTIFICATION
      17              :  *        src/interfaces/ecpg/preproc/parser.c
      18              :  *
      19              :  *-------------------------------------------------------------------------
      20              :  */
      21              : 
      22              : #include "postgres_fe.h"
      23              : 
      24              : #include "preproc_extern.h"
      25              : #include "preproc.h"
      26              : 
      27              : 
      28              : static bool have_lookahead;             /* is lookahead info valid? */
      29              : static int      lookahead_token;        /* one-token lookahead */
      30              : static YYSTYPE lookahead_yylval;        /* yylval for lookahead token */
      31              : static YYLTYPE lookahead_yylloc;        /* yylloc for lookahead token */
      32              : static char *lookahead_yytext;  /* start current token */
      33              : 
      34              : static int      base_yylex_location(void);
      35              : static bool check_uescapechar(unsigned char escape);
      36              : static bool ecpg_isspace(char ch);
      37              : 
      38              : 
      39              : /*
      40              :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      41              :  *
      42              :  * This filter is needed because in some cases the standard SQL grammar
      43              :  * requires more than one token lookahead.  We reduce these cases to one-token
      44              :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      45              :  *
      46              :  * Using a filter is simpler than trying to recognize multiword tokens
      47              :  * directly in scan.l, because we'd have to allow for comments between the
      48              :  * words.  Furthermore it's not clear how to do that without re-introducing
      49              :  * scanner backtrack, which would cost more performance than this filter
      50              :  * layer does.
      51              :  *
      52              :  * We also use this filter to convert UIDENT and USCONST sequences into
      53              :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      54              :  * productions in the main grammar, it's more efficient to do it like this.
      55              :  */
      56              : int
      57            0 : filtered_base_yylex(void)
      58              : {
      59            0 :         int                     cur_token;
      60            0 :         int                     next_token;
      61            0 :         YYSTYPE         cur_yylval;
      62            0 :         YYLTYPE         cur_yylloc;
      63            0 :         char       *cur_yytext;
      64              : 
      65              :         /* Get next token --- we might already have it */
      66            0 :         if (have_lookahead)
      67              :         {
      68            0 :                 cur_token = lookahead_token;
      69            0 :                 base_yylval = lookahead_yylval;
      70            0 :                 base_yylloc = lookahead_yylloc;
      71            0 :                 base_yytext = lookahead_yytext;
      72            0 :                 have_lookahead = false;
      73            0 :         }
      74              :         else
      75            0 :                 cur_token = base_yylex_location();
      76              : 
      77              :         /*
      78              :          * If this token isn't one that requires lookahead, just return it.
      79              :          */
      80            0 :         switch (cur_token)
      81              :         {
      82              :                 case FORMAT:
      83              :                 case NOT:
      84              :                 case NULLS_P:
      85              :                 case WITH:
      86              :                 case WITHOUT:
      87              :                 case UIDENT:
      88              :                 case USCONST:
      89            0 :                         break;
      90              :                 default:
      91            0 :                         return cur_token;
      92              :         }
      93              : 
      94              :         /* Save and restore lexer output variables around the call */
      95            0 :         cur_yylval = base_yylval;
      96            0 :         cur_yylloc = base_yylloc;
      97            0 :         cur_yytext = base_yytext;
      98              : 
      99              :         /* Get next token, saving outputs into lookahead variables */
     100            0 :         next_token = base_yylex_location();
     101              : 
     102            0 :         lookahead_token = next_token;
     103            0 :         lookahead_yylval = base_yylval;
     104            0 :         lookahead_yylloc = base_yylloc;
     105            0 :         lookahead_yytext = base_yytext;
     106              : 
     107            0 :         base_yylval = cur_yylval;
     108            0 :         base_yylloc = cur_yylloc;
     109            0 :         base_yytext = cur_yytext;
     110              : 
     111            0 :         have_lookahead = true;
     112              : 
     113              :         /* Replace cur_token if needed, based on lookahead */
     114            0 :         switch (cur_token)
     115              :         {
     116              :                 case FORMAT:
     117              :                         /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
     118            0 :                         switch (next_token)
     119              :                         {
     120              :                                 case JSON:
     121            0 :                                         cur_token = FORMAT_LA;
     122            0 :                                         break;
     123              :                         }
     124            0 :                         break;
     125              : 
     126              :                 case NOT:
     127              :                         /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     128            0 :                         switch (next_token)
     129              :                         {
     130              :                                 case BETWEEN:
     131              :                                 case IN_P:
     132              :                                 case LIKE:
     133              :                                 case ILIKE:
     134              :                                 case SIMILAR:
     135            0 :                                         cur_token = NOT_LA;
     136            0 :                                         break;
     137              :                         }
     138            0 :                         break;
     139              : 
     140              :                 case NULLS_P:
     141              :                         /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     142            0 :                         switch (next_token)
     143              :                         {
     144              :                                 case FIRST_P:
     145              :                                 case LAST_P:
     146            0 :                                         cur_token = NULLS_LA;
     147            0 :                                         break;
     148              :                         }
     149            0 :                         break;
     150              : 
     151              :                 case WITH:
     152              :                         /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     153            0 :                         switch (next_token)
     154              :                         {
     155              :                                 case TIME:
     156              :                                 case ORDINALITY:
     157            0 :                                         cur_token = WITH_LA;
     158            0 :                                         break;
     159              :                         }
     160            0 :                         break;
     161              : 
     162              :                 case WITHOUT:
     163              :                         /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     164            0 :                         switch (next_token)
     165              :                         {
     166              :                                 case TIME:
     167            0 :                                         cur_token = WITHOUT_LA;
     168            0 :                                         break;
     169              :                         }
     170            0 :                         break;
     171              :                 case UIDENT:
     172              :                 case USCONST:
     173              :                         /* Look ahead for UESCAPE */
     174            0 :                         if (next_token == UESCAPE)
     175              :                         {
     176              :                                 /* Yup, so get third token, which had better be SCONST */
     177            0 :                                 const char *escstr;
     178              : 
     179              :                                 /*
     180              :                                  * Again save and restore lexer output variables around the
     181              :                                  * call
     182              :                                  */
     183            0 :                                 cur_yylval = base_yylval;
     184            0 :                                 cur_yylloc = base_yylloc;
     185            0 :                                 cur_yytext = base_yytext;
     186              : 
     187              :                                 /* Get third token */
     188            0 :                                 next_token = base_yylex_location();
     189              : 
     190            0 :                                 if (next_token != SCONST)
     191            0 :                                         mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     192              : 
     193              :                                 /*
     194              :                                  * Save and check escape string, which the scanner returns
     195              :                                  * with quotes
     196              :                                  */
     197            0 :                                 escstr = base_yylval.str;
     198            0 :                                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     199            0 :                                         mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     200              : 
     201            0 :                                 base_yylval = cur_yylval;
     202            0 :                                 base_yylloc = cur_yylloc;
     203            0 :                                 base_yytext = cur_yytext;
     204              : 
     205              :                                 /* Combine 3 tokens into 1 */
     206            0 :                                 base_yylval.str = make3_str(base_yylval.str,
     207              :                                                                                         " UESCAPE ",
     208            0 :                                                                                         escstr);
     209            0 :                                 base_yylloc = loc_strdup(base_yylval.str);
     210              : 
     211              :                                 /* Clear have_lookahead, thereby consuming all three tokens */
     212            0 :                                 have_lookahead = false;
     213            0 :                         }
     214              : 
     215            0 :                         if (cur_token == UIDENT)
     216            0 :                                 cur_token = IDENT;
     217            0 :                         else if (cur_token == USCONST)
     218            0 :                                 cur_token = SCONST;
     219            0 :                         break;
     220              :         }
     221              : 
     222            0 :         return cur_token;
     223            0 : }
     224              : 
     225              : /*
     226              :  * Call base_yylex() and fill in base_yylloc.
     227              :  *
     228              :  * pgc.l does not worry about setting yylloc, and given what we want for
     229              :  * that, trying to set it there would be pretty inconvenient.  What we
     230              :  * want is: if the returned token has type <str>, then duplicate its
     231              :  * string value as yylloc; otherwise, make a downcased copy of yytext.
     232              :  * The downcasing is ASCII-only because all that we care about there
     233              :  * is producing uniformly-cased output of keywords.  (That's mostly
     234              :  * cosmetic, but there are places in ecpglib that expect to receive
     235              :  * downcased keywords, plus it keeps us regression-test-compatible
     236              :  * with the pre-v18 implementation of ecpg.)
     237              :  */
     238              : static int
     239            0 : base_yylex_location(void)
     240              : {
     241            0 :         int                     token = base_yylex();
     242              : 
     243            0 :         switch (token)
     244              :         {
     245              :                         /* List a token here if pgc.l assigns to base_yylval.str for it */
     246              :                 case Op:
     247              :                 case CSTRING:
     248              :                 case CPP_LINE:
     249              :                 case CVARIABLE:
     250              :                 case BCONST:
     251              :                 case SCONST:
     252              :                 case USCONST:
     253              :                 case XCONST:
     254              :                 case FCONST:
     255              :                 case IDENT:
     256              :                 case UIDENT:
     257              :                 case IP:
     258              :                         /* Duplicate the <str> value */
     259            0 :                         base_yylloc = loc_strdup(base_yylval.str);
     260            0 :                         break;
     261              :                 default:
     262              :                         /* Else just use the input, i.e., yytext */
     263            0 :                         base_yylloc = loc_strdup(base_yytext);
     264              :                         /* Apply an ASCII-only downcasing */
     265            0 :                         for (unsigned char *ptr = (unsigned char *) base_yylloc; *ptr; ptr++)
     266              :                         {
     267            0 :                                 if (*ptr >= 'A' && *ptr <= 'Z')
     268            0 :                                         *ptr += 'a' - 'A';
     269            0 :                         }
     270            0 :                         break;
     271              :         }
     272            0 :         return token;
     273            0 : }
     274              : 
     275              : /*
     276              :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     277              :  * in pgc.l.
     278              :  */
     279              : 
     280              : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     281              : static bool
     282            0 : check_uescapechar(unsigned char escape)
     283              : {
     284            0 :         if (isxdigit(escape)
     285            0 :                 || escape == '+'
     286            0 :                 || escape == '\''
     287            0 :                 || escape == '"'
     288            0 :                 || ecpg_isspace(escape))
     289            0 :                 return false;
     290              :         else
     291            0 :                 return true;
     292            0 : }
     293              : 
     294              : /*
     295              :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     296              :  */
     297              : static bool
     298            0 : ecpg_isspace(char ch)
     299              : {
     300            0 :         if (ch == ' ' ||
     301            0 :                 ch == '\t' ||
     302            0 :                 ch == '\n' ||
     303            0 :                 ch == '\r' ||
     304            0 :                 ch == '\f')
     305            0 :                 return true;
     306            0 :         return false;
     307            0 : }
        

Generated by: LCOV version 2.3.2-1