LCOV - Code coverage - src/backend/utils/adt/pg_locale

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	76.4 %	110	84
Test Date:	2026-01-26 10:56:24	Functions:	68.2 %	22	15
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	52.5 %	40	21

             Branch data     Line data    Source code

       1                 :             : /*-----------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * PostgreSQL locale utilities for builtin provider
       4                 :             :  *
       5                 :             :  * Portions Copyright (c) 2002-2026, PostgreSQL Global Development Group
       6                 :             :  *
       7                 :             :  * src/backend/utils/adt/pg_locale_builtin.c
       8                 :             :  *
       9                 :             :  *-----------------------------------------------------------------------
      10                 :             :  */
      11                 :             : 
      12                 :             : #include "postgres.h"
      13                 :             : 
      14                 :             : #include "catalog/pg_database.h"
      15                 :             : #include "catalog/pg_collation.h"
      16                 :             : #include "common/unicode_case.h"
      17                 :             : #include "common/unicode_category.h"
      18                 :             : #include "miscadmin.h"
      19                 :             : #include "utils/builtins.h"
      20                 :             : #include "utils/pg_locale.h"
      21                 :             : #include "utils/syscache.h"
      22                 :             : 
      23                 :             : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      24                 :             :                                                                                         MemoryContext context);
      25                 :             : extern char *get_collation_actual_version_builtin(const char *collcollate);
      26                 :             : 
      27                 :             : struct WordBoundaryState
      28                 :             : {
      29                 :             :         const char *str;
      30                 :             :         size_t          len;
      31                 :             :         size_t          offset;
      32                 :             :         bool            posix;
      33                 :             :         bool            init;
      34                 :             :         bool            prev_alnum;
      35                 :             : };
      36                 :             : 
      37                 :             : /*
      38                 :             :  * In UTF-8, pg_wchar is guaranteed to be the code point value.
      39                 :             :  */
      40                 :             : static inline char32_t
      41                 :       16490 : to_char32(pg_wchar wc)
      42                 :             : {
      43         [ +  - ]:       16490 :         Assert(GetDatabaseEncoding() == PG_UTF8);
      44                 :       16490 :         return (char32_t) wc;
      45                 :             : }
      46                 :             : 
      47                 :             : static inline pg_wchar
      48                 :         104 : to_pg_wchar(char32_t c32)
      49                 :             : {
      50         [ +  - ]:         104 :         Assert(GetDatabaseEncoding() == PG_UTF8);
      51                 :         104 :         return (pg_wchar) c32;
      52                 :             : }
      53                 :             : 
      54                 :             : /*
      55                 :             :  * Simple word boundary iterator that draws boundaries each time the result of
      56                 :             :  * pg_u_isalnum() changes.
      57                 :             :  */
      58                 :             : static size_t
      59                 :         136 : initcap_wbnext(void *state)
      60                 :             : {
      61                 :         136 :         struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      62                 :             : 
      63   [ +  +  +  + ]:         528 :         while (wbstate->offset < wbstate->len &&
      64                 :         248 :                    wbstate->str[wbstate->offset] != '\0')
      65                 :             :         {
      66                 :         496 :                 char32_t        u = utf8_to_unicode((unsigned char *) wbstate->str +
      67                 :         248 :                                                                                 wbstate->offset);
      68                 :         248 :                 bool            curr_alnum = pg_u_isalnum(u, wbstate->posix);
      69                 :             : 
      70   [ +  +  +  + ]:         248 :                 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      71                 :             :                 {
      72                 :         104 :                         size_t          prev_offset = wbstate->offset;
      73                 :             : 
      74                 :         104 :                         wbstate->init = true;
      75                 :         104 :                         wbstate->offset += unicode_utf8len(u);
      76                 :         104 :                         wbstate->prev_alnum = curr_alnum;
      77                 :         104 :                         return prev_offset;
      78                 :         104 :                 }
      79                 :             : 
      80                 :         144 :                 wbstate->offset += unicode_utf8len(u);
      81         [ +  + ]:         248 :         }
      82                 :             : 
      83                 :          32 :         return wbstate->len;
      84                 :         136 : }
      85                 :             : 
      86                 :             : static size_t
      87                 :          48 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      88                 :             :                                  pg_locale_t locale)
      89                 :             : {
      90                 :          96 :         return unicode_strlower(dest, destsize, src, srclen,
      91                 :          48 :                                                         locale->builtin.casemap_full);
      92                 :             : }
      93                 :             : 
      94                 :             : static size_t
      95                 :          32 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      96                 :             :                                  pg_locale_t locale)
      97                 :             : {
      98                 :         128 :         struct WordBoundaryState wbstate = {
      99                 :          32 :                 .str = src,
     100                 :          32 :                 .len = srclen,
     101                 :             :                 .offset = 0,
     102                 :          32 :                 .posix = !locale->builtin.casemap_full,
     103                 :             :                 .init = false,
     104                 :             :                 .prev_alnum = false,
     105                 :             :         };
     106                 :             : 
     107                 :          96 :         return unicode_strtitle(dest, destsize, src, srclen,
     108                 :          32 :                                                         locale->builtin.casemap_full,
     109                 :             :                                                         initcap_wbnext, &wbstate);
     110                 :          32 : }
     111                 :             : 
     112                 :             : static size_t
     113                 :          28 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     114                 :             :                                  pg_locale_t locale)
     115                 :             : {
     116                 :          56 :         return unicode_strupper(dest, destsize, src, srclen,
     117                 :          28 :                                                         locale->builtin.casemap_full);
     118                 :             : }
     119                 :             : 
     120                 :             : static size_t
     121                 :           2 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     122                 :             :                                 pg_locale_t locale)
     123                 :             : {
     124                 :           4 :         return unicode_strfold(dest, destsize, src, srclen,
     125                 :           2 :                                                    locale->builtin.casemap_full);
     126                 :             : }
     127                 :             : 
     128                 :             : static bool
     129                 :        4098 : wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
     130                 :             : {
     131                 :        4098 :         return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
     132                 :             : }
     133                 :             : 
     134                 :             : static bool
     135                 :           0 : wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
     136                 :             : {
     137                 :           0 :         return pg_u_isalpha(to_char32(wc));
     138                 :             : }
     139                 :             : 
     140                 :             : static bool
     141                 :        4096 : wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
     142                 :             : {
     143                 :        4096 :         return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
     144                 :             : }
     145                 :             : 
     146                 :             : static bool
     147                 :        4096 : wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
     148                 :             : {
     149                 :        4096 :         return pg_u_isupper(to_char32(wc));
     150                 :             : }
     151                 :             : 
     152                 :             : static bool
     153                 :           0 : wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
     154                 :             : {
     155                 :           0 :         return pg_u_islower(to_char32(wc));
     156                 :             : }
     157                 :             : 
     158                 :             : static bool
     159                 :           0 : wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
     160                 :             : {
     161                 :           0 :         return pg_u_isgraph(to_char32(wc));
     162                 :             : }
     163                 :             : 
     164                 :             : static bool
     165                 :           0 : wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
     166                 :             : {
     167                 :           0 :         return pg_u_isprint(to_char32(wc));
     168                 :             : }
     169                 :             : 
     170                 :             : static bool
     171                 :        4096 : wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
     172                 :             : {
     173                 :        4096 :         return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
     174                 :             : }
     175                 :             : 
     176                 :             : static bool
     177                 :           0 : wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
     178                 :             : {
     179                 :           0 :         return pg_u_isspace(to_char32(wc));
     180                 :             : }
     181                 :             : 
     182                 :             : static bool
     183                 :           0 : wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
     184                 :             : {
     185                 :           0 :         return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
     186                 :             : }
     187                 :             : 
     188                 :             : static bool
     189                 :           0 : wc_iscased_builtin(pg_wchar wc, pg_locale_t locale)
     190                 :             : {
     191                 :           0 :         return pg_u_prop_cased(to_char32(wc));
     192                 :             : }
     193                 :             : 
     194                 :             : static pg_wchar
     195                 :          52 : wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
     196                 :             : {
     197                 :          52 :         return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
     198                 :             : }
     199                 :             : 
     200                 :             : static pg_wchar
     201                 :          52 : wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
     202                 :             : {
     203                 :          52 :         return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
     204                 :             : }
     205                 :             : 
     206                 :             : static const struct ctype_methods ctype_methods_builtin = {
     207                 :             :         .strlower = strlower_builtin,
     208                 :             :         .strtitle = strtitle_builtin,
     209                 :             :         .strupper = strupper_builtin,
     210                 :             :         .strfold = strfold_builtin,
     211                 :             :         /* uses plain ASCII semantics for historical reasons */
     212                 :             :         .downcase_ident = NULL,
     213                 :             :         .wc_isdigit = wc_isdigit_builtin,
     214                 :             :         .wc_isalpha = wc_isalpha_builtin,
     215                 :             :         .wc_isalnum = wc_isalnum_builtin,
     216                 :             :         .wc_isupper = wc_isupper_builtin,
     217                 :             :         .wc_islower = wc_islower_builtin,
     218                 :             :         .wc_isgraph = wc_isgraph_builtin,
     219                 :             :         .wc_isprint = wc_isprint_builtin,
     220                 :             :         .wc_ispunct = wc_ispunct_builtin,
     221                 :             :         .wc_isspace = wc_isspace_builtin,
     222                 :             :         .wc_isxdigit = wc_isxdigit_builtin,
     223                 :             :         .wc_iscased = wc_iscased_builtin,
     224                 :             :         .wc_tolower = wc_tolower_builtin,
     225                 :             :         .wc_toupper = wc_toupper_builtin,
     226                 :             : };
     227                 :             : 
     228                 :             : pg_locale_t
     229                 :           8 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     230                 :             : {
     231                 :           8 :         const char *locstr;
     232                 :           8 :         pg_locale_t result;
     233                 :             : 
     234         [ -  + ]:           8 :         if (collid == DEFAULT_COLLATION_OID)
     235                 :             :         {
     236                 :           0 :                 HeapTuple       tp;
     237                 :           0 :                 Datum           datum;
     238                 :             : 
     239                 :           0 :                 tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     240         [ #  # ]:           0 :                 if (!HeapTupleIsValid(tp))
     241   [ #  #  #  # ]:           0 :                         elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     242                 :           0 :                 datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     243                 :             :                                                                            Anum_pg_database_datlocale);
     244                 :           0 :                 locstr = TextDatumGetCString(datum);
     245                 :           0 :                 ReleaseSysCache(tp);
     246                 :           0 :         }
     247                 :             :         else
     248                 :             :         {
     249                 :           8 :                 HeapTuple       tp;
     250                 :           8 :                 Datum           datum;
     251                 :             : 
     252                 :           8 :                 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     253         [ +  - ]:           8 :                 if (!HeapTupleIsValid(tp))
     254   [ #  #  #  # ]:           0 :                         elog(ERROR, "cache lookup failed for collation %u", collid);
     255                 :           8 :                 datum = SysCacheGetAttrNotNull(COLLOID, tp,
     256                 :             :                                                                            Anum_pg_collation_colllocale);
     257                 :           8 :                 locstr = TextDatumGetCString(datum);
     258                 :           8 :                 ReleaseSysCache(tp);
     259                 :           8 :         }
     260                 :             : 
     261                 :           8 :         builtin_validate_locale(GetDatabaseEncoding(), locstr);
     262                 :             : 
     263                 :           8 :         result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     264                 :             : 
     265                 :           8 :         result->builtin.locale = MemoryContextStrdup(context, locstr);
     266                 :           8 :         result->builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     267                 :           8 :         result->deterministic = true;
     268                 :           8 :         result->collate_is_c = true;
     269                 :           8 :         result->ctype_is_c = (strcmp(locstr, "C") == 0);
     270         [ +  + ]:           8 :         if (!result->ctype_is_c)
     271                 :           5 :                 result->ctype = &ctype_methods_builtin;
     272                 :             : 
     273                 :          16 :         return result;
     274                 :           8 : }
     275                 :             : 
     276                 :             : char *
     277                 :          13 : get_collation_actual_version_builtin(const char *collcollate)
     278                 :             : {
     279                 :             :         /*
     280                 :             :          * The only two supported locales (C and C.UTF-8) are both based on memcmp
     281                 :             :          * and are not expected to change, but track the version anyway.
     282                 :             :          *
     283                 :             :          * Note that the character semantics may change for some locales, but the
     284                 :             :          * collation version only tracks changes to sort order.
     285                 :             :          */
     286         [ +  + ]:          13 :         if (strcmp(collcollate, "C") == 0)
     287                 :           5 :                 return "1";
     288         [ +  + ]:           8 :         else if (strcmp(collcollate, "C.UTF-8") == 0)
     289                 :           5 :                 return "1";
     290         [ +  - ]:           3 :         else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     291                 :           3 :                 return "1";
     292                 :             :         else
     293   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     294                 :             :                                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     295                 :             :                                  errmsg("invalid locale name \"%s\" for builtin provider",
     296                 :             :                                                 collcollate)));
     297                 :             : 
     298                 :           0 :         return NULL;                            /* keep compiler quiet */
     299                 :          13 : }

Generated by: LCOV version 2.3.2-1