LCOV - Code coverage - src/backend/utils/adt/encode.c

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - encode.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	21.8 %	380	83
Test Date:	2026-01-26 10:56:24	Functions:	15.4 %	26	4
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	8.5 %	212	18

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * encode.c
       4                 :             :  *        Various data encoding/decoding things.
       5                 :             :  *
       6                 :             :  * Copyright (c) 2001-2026, PostgreSQL Global Development Group
       7                 :             :  *
       8                 :             :  *
       9                 :             :  * IDENTIFICATION
      10                 :             :  *        src/backend/utils/adt/encode.c
      11                 :             :  *
      12                 :             :  *-------------------------------------------------------------------------
      13                 :             :  */
      14                 :             : #include "postgres.h"
      15                 :             : 
      16                 :             : #include <ctype.h>
      17                 :             : 
      18                 :             : #include "mb/pg_wchar.h"
      19                 :             : #include "port/simd.h"
      20                 :             : #include "utils/builtins.h"
      21                 :             : #include "utils/memutils.h"
      22                 :             : #include "varatt.h"
      23                 :             : 
      24                 :             : 
      25                 :             : /*
      26                 :             :  * Encoding conversion API.
      27                 :             :  * encode_len() and decode_len() compute the amount of space needed, while
      28                 :             :  * encode() and decode() perform the actual conversions.  It is okay for
      29                 :             :  * the _len functions to return an overestimate, but not an underestimate.
      30                 :             :  * (Having said that, large overestimates could cause unnecessary errors,
      31                 :             :  * so it's better to get it right.)  The conversion routines write to the
      32                 :             :  * buffer at *res and return the true length of their output.
      33                 :             :  */
      34                 :             : struct pg_encoding
      35                 :             : {
      36                 :             :         uint64          (*encode_len) (const char *data, size_t dlen);
      37                 :             :         uint64          (*decode_len) (const char *data, size_t dlen);
      38                 :             :         uint64          (*encode) (const char *data, size_t dlen, char *res);
      39                 :             :         uint64          (*decode) (const char *data, size_t dlen, char *res);
      40                 :             : };
      41                 :             : 
      42                 :             : static const struct pg_encoding *pg_find_encoding(const char *name);
      43                 :             : 
      44                 :             : /*
      45                 :             :  * SQL functions.
      46                 :             :  */
      47                 :             : 
      48                 :             : Datum
      49                 :       35083 : binary_encode(PG_FUNCTION_ARGS)
      50                 :             : {
      51                 :       35083 :         bytea      *data = PG_GETARG_BYTEA_PP(0);
      52                 :       35083 :         Datum           name = PG_GETARG_DATUM(1);
      53                 :       35083 :         text       *result;
      54                 :       35083 :         char       *namebuf;
      55                 :       35083 :         char       *dataptr;
      56                 :       35083 :         size_t          datalen;
      57                 :       35083 :         uint64          resultlen;
      58                 :       35083 :         uint64          res;
      59                 :       35083 :         const struct pg_encoding *enc;
      60                 :             : 
      61                 :       35083 :         namebuf = TextDatumGetCString(name);
      62                 :             : 
      63                 :       35083 :         enc = pg_find_encoding(namebuf);
      64         [ +  + ]:       35083 :         if (enc == NULL)
      65   [ +  -  +  - ]:           1 :                 ereport(ERROR,
      66                 :             :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
      67                 :             :                                  errmsg("unrecognized encoding: \"%s\"", namebuf),
      68                 :             :                                  errhint("Valid encodings are \"%s\", \"%s\", \"%s\", and \"%s\".",
      69                 :             :                                                  "base64", "base64url", "escape", "hex")));
      70                 :             : 
      71                 :       35082 :         dataptr = VARDATA_ANY(data);
      72                 :       35082 :         datalen = VARSIZE_ANY_EXHDR(data);
      73                 :             : 
      74                 :       35082 :         resultlen = enc->encode_len(dataptr, datalen);
      75                 :             : 
      76                 :             :         /*
      77                 :             :          * resultlen possibly overflows uint32, therefore on 32-bit machines it's
      78                 :             :          * unsafe to rely on palloc's internal check.
      79                 :             :          */
      80         [ +  - ]:       35082 :         if (resultlen > MaxAllocSize - VARHDRSZ)
      81   [ #  #  #  # ]:           0 :                 ereport(ERROR,
      82                 :             :                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      83                 :             :                                  errmsg("result of encoding conversion is too large")));
      84                 :             : 
      85                 :       35082 :         result = palloc(VARHDRSZ + resultlen);
      86                 :             : 
      87                 :       35082 :         res = enc->encode(dataptr, datalen, VARDATA(result));
      88                 :             : 
      89                 :             :         /* Make this FATAL 'cause we've trodden on memory ... */
      90         [ +  - ]:       35082 :         if (res > resultlen)
      91   [ #  #  #  # ]:           0 :                 elog(FATAL, "overflow - encode estimate too small");
      92                 :             : 
      93                 :       35082 :         SET_VARSIZE(result, VARHDRSZ + res);
      94                 :             : 
      95                 :       70164 :         PG_RETURN_TEXT_P(result);
      96                 :       35082 : }
      97                 :             : 
      98                 :             : Datum
      99                 :          32 : binary_decode(PG_FUNCTION_ARGS)
     100                 :             : {
     101                 :          32 :         text       *data = PG_GETARG_TEXT_PP(0);
     102                 :          32 :         Datum           name = PG_GETARG_DATUM(1);
     103                 :          32 :         bytea      *result;
     104                 :          32 :         char       *namebuf;
     105                 :          32 :         char       *dataptr;
     106                 :          32 :         size_t          datalen;
     107                 :          32 :         uint64          resultlen;
     108                 :          32 :         uint64          res;
     109                 :          32 :         const struct pg_encoding *enc;
     110                 :             : 
     111                 :          32 :         namebuf = TextDatumGetCString(name);
     112                 :             : 
     113                 :          32 :         enc = pg_find_encoding(namebuf);
     114         [ +  + ]:          32 :         if (enc == NULL)
     115   [ +  -  +  - ]:           1 :                 ereport(ERROR,
     116                 :             :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     117                 :             :                                  errmsg("unrecognized encoding: \"%s\"", namebuf),
     118                 :             :                                  errhint("Valid encodings are \"%s\", \"%s\", \"%s\", and \"%s\".",
     119                 :             :                                                  "base64", "base64url", "escape", "hex")));
     120                 :             : 
     121                 :          31 :         dataptr = VARDATA_ANY(data);
     122                 :          31 :         datalen = VARSIZE_ANY_EXHDR(data);
     123                 :             : 
     124                 :          31 :         resultlen = enc->decode_len(dataptr, datalen);
     125                 :             : 
     126                 :             :         /*
     127                 :             :          * resultlen possibly overflows uint32, therefore on 32-bit machines it's
     128                 :             :          * unsafe to rely on palloc's internal check.
     129                 :             :          */
     130         [ +  - ]:          31 :         if (resultlen > MaxAllocSize - VARHDRSZ)
     131   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     132                 :             :                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     133                 :             :                                  errmsg("result of decoding conversion is too large")));
     134                 :             : 
     135                 :          31 :         result = palloc(VARHDRSZ + resultlen);
     136                 :             : 
     137                 :          31 :         res = enc->decode(dataptr, datalen, VARDATA(result));
     138                 :             : 
     139                 :             :         /* Make this FATAL 'cause we've trodden on memory ... */
     140         [ +  - ]:          31 :         if (res > resultlen)
     141   [ #  #  #  # ]:           0 :                 elog(FATAL, "overflow - decode estimate too small");
     142                 :             : 
     143                 :          31 :         SET_VARSIZE(result, VARHDRSZ + res);
     144                 :             : 
     145                 :          62 :         PG_RETURN_BYTEA_P(result);
     146                 :          31 : }
     147                 :             : 
     148                 :             : 
     149                 :             : /*
     150                 :             :  * HEX
     151                 :             :  */
     152                 :             : 
     153                 :             : /*
     154                 :             :  * The hex expansion of each possible byte value (two chars per value).
     155                 :             :  */
     156                 :             : static const char hextbl[512] =
     157                 :             : "000102030405060708090a0b0c0d0e0f"
     158                 :             : "101112131415161718191a1b1c1d1e1f"
     159                 :             : "202122232425262728292a2b2c2d2e2f"
     160                 :             : "303132333435363738393a3b3c3d3e3f"
     161                 :             : "404142434445464748494a4b4c4d4e4f"
     162                 :             : "505152535455565758595a5b5c5d5e5f"
     163                 :             : "606162636465666768696a6b6c6d6e6f"
     164                 :             : "707172737475767778797a7b7c7d7e7f"
     165                 :             : "808182838485868788898a8b8c8d8e8f"
     166                 :             : "909192939495969798999a9b9c9d9e9f"
     167                 :             : "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
     168                 :             : "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
     169                 :             : "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
     170                 :             : "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
     171                 :             : "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
     172                 :             : "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
     173                 :             : 
     174                 :             : static const int8 hexlookup[128] = {
     175                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     176                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     177                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     178                 :             :         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
     179                 :             :         -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     180                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     181                 :             :         -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     182                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     183                 :             : };
     184                 :             : 
     185                 :             : static inline uint64
     186                 :           0 : hex_encode_scalar(const char *src, size_t len, char *dst)
     187                 :             : {
     188                 :           0 :         const char *end = src + len;
     189                 :             : 
     190         [ #  # ]:           0 :         while (src < end)
     191                 :             :         {
     192                 :           0 :                 unsigned char usrc = *((const unsigned char *) src);
     193                 :             : 
     194                 :           0 :                 memcpy(dst, &hextbl[2 * usrc], 2);
     195                 :           0 :                 src++;
     196                 :           0 :                 dst += 2;
     197                 :           0 :         }
     198                 :           0 :         return (uint64) len * 2;
     199                 :           0 : }
     200                 :             : 
     201                 :             : uint64
     202                 :       36873 : hex_encode(const char *src, size_t len, char *dst)
     203                 :             : {
     204                 :             : #ifdef USE_NO_SIMD
     205                 :             :         return hex_encode_scalar(src, len, dst);
     206                 :             : #else
     207                 :       36873 :         const uint64 tail_idx = len & ~(sizeof(Vector8) - 1);
     208                 :       36873 :         uint64          i;
     209                 :             : 
     210                 :             :         /*
     211                 :             :          * This splits the high and low nibbles of each byte into separate
     212                 :             :          * vectors, adds the vectors to a mask that converts the nibbles to their
     213                 :             :          * equivalent ASCII bytes, and interleaves those bytes back together to
     214                 :             :          * form the final hex-encoded string.
     215                 :             :          */
     216         [ +  + ]:      107035 :         for (i = 0; i < tail_idx; i += sizeof(Vector8))
     217                 :             :         {
     218                 :       70162 :                 Vector8         srcv;
     219                 :       70162 :                 Vector8         lo;
     220                 :       70162 :                 Vector8         hi;
     221                 :       70162 :                 Vector8         mask;
     222                 :             : 
     223                 :       70162 :                 vector8_load(&srcv, (const uint8 *) &src[i]);
     224                 :             : 
     225                 :       70162 :                 lo = vector8_and(srcv, vector8_broadcast(0x0f));
     226                 :       70162 :                 mask = vector8_gt(lo, vector8_broadcast(0x9));
     227                 :       70162 :                 mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
     228                 :       70162 :                 mask = vector8_add(mask, vector8_broadcast('0'));
     229                 :       70162 :                 lo = vector8_add(lo, mask);
     230                 :             : 
     231                 :       70162 :                 hi = vector8_and(srcv, vector8_broadcast(0xf0));
     232                 :       70162 :                 hi = vector8_shift_right(hi, 4);
     233                 :       70162 :                 mask = vector8_gt(hi, vector8_broadcast(0x9));
     234                 :       70162 :                 mask = vector8_and(mask, vector8_broadcast('a' - '0' - 10));
     235                 :       70162 :                 mask = vector8_add(mask, vector8_broadcast('0'));
     236                 :       70162 :                 hi = vector8_add(hi, mask);
     237                 :             : 
     238                 :      140324 :                 vector8_store((uint8 *) &dst[i * 2],
     239                 :       70162 :                                           vector8_interleave_low(hi, lo));
     240                 :      140324 :                 vector8_store((uint8 *) &dst[i * 2 + sizeof(Vector8)],
     241                 :       70162 :                                           vector8_interleave_high(hi, lo));
     242                 :       70162 :         }
     243                 :             : 
     244                 :       36873 :         (void) hex_encode_scalar(src + i, len - i, dst + i * 2);
     245                 :             : 
     246                 :       73746 :         return (uint64) len * 2;
     247                 :             : #endif
     248                 :       36873 : }
     249                 :             : 
     250                 :             : static inline bool
     251                 :           0 : get_hex(const char *cp, char *out)
     252                 :             : {
     253                 :           0 :         unsigned char c = (unsigned char) *cp;
     254                 :           0 :         int                     res = -1;
     255                 :             : 
     256         [ #  # ]:           0 :         if (c < 127)
     257                 :           0 :                 res = hexlookup[c];
     258                 :             : 
     259                 :           0 :         *out = (char) res;
     260                 :             : 
     261                 :           0 :         return (res >= 0);
     262                 :           0 : }
     263                 :             : 
     264                 :             : uint64
     265                 :           0 : hex_decode(const char *src, size_t len, char *dst)
     266                 :             : {
     267                 :           0 :         return hex_decode_safe(src, len, dst, NULL);
     268                 :             : }
     269                 :             : 
     270                 :             : static inline uint64
     271                 :           0 : hex_decode_safe_scalar(const char *src, size_t len, char *dst, Node *escontext)
     272                 :             : {
     273                 :           0 :         const char *s,
     274                 :             :                            *srcend;
     275                 :           0 :         char            v1,
     276                 :             :                                 v2,
     277                 :             :                            *p;
     278                 :             : 
     279                 :           0 :         srcend = src + len;
     280                 :           0 :         s = src;
     281                 :           0 :         p = dst;
     282         [ #  # ]:           0 :         while (s < srcend)
     283                 :             :         {
     284   [ #  #  #  #  :           0 :                 if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
             #  #  #  # ]
     285                 :             :                 {
     286                 :           0 :                         s++;
     287                 :           0 :                         continue;
     288                 :             :                 }
     289         [ #  # ]:           0 :                 if (!get_hex(s, &v1))
     290         [ #  # ]:           0 :                         ereturn(escontext, 0,
     291                 :             :                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     292                 :             :                                          errmsg("invalid hexadecimal digit: \"%.*s\"",
     293                 :             :                                                         pg_mblen(s), s)));
     294                 :           0 :                 s++;
     295         [ #  # ]:           0 :                 if (s >= srcend)
     296         [ #  # ]:           0 :                         ereturn(escontext, 0,
     297                 :             :                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     298                 :             :                                          errmsg("invalid hexadecimal data: odd number of digits")));
     299         [ #  # ]:           0 :                 if (!get_hex(s, &v2))
     300         [ #  # ]:           0 :                         ereturn(escontext, 0,
     301                 :             :                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     302                 :             :                                          errmsg("invalid hexadecimal digit: \"%.*s\"",
     303                 :             :                                                         pg_mblen(s), s)));
     304                 :           0 :                 s++;
     305                 :           0 :                 *p++ = (v1 << 4) | v2;
     306                 :             :         }
     307                 :             : 
     308                 :           0 :         return p - dst;
     309                 :           0 : }
     310                 :             : 
     311                 :             : /*
     312                 :             :  * This helper converts each byte to its binary-equivalent nibble by
     313                 :             :  * subtraction and combines them to form the return bytes (separated by zero
     314                 :             :  * bytes).  Returns false if any input bytes are outside the expected ranges of
     315                 :             :  * ASCII values.  Otherwise, returns true.
     316                 :             :  */
     317                 :             : #ifndef USE_NO_SIMD
     318                 :             : static inline bool
     319                 :           0 : hex_decode_simd_helper(const Vector8 src, Vector8 *dst)
     320                 :             : {
     321                 :           0 :         Vector8         sub;
     322                 :           0 :         Vector8         mask_hi = vector8_interleave_low(vector8_broadcast(0), vector8_broadcast(0x0f));
     323                 :           0 :         Vector8         mask_lo = vector8_interleave_low(vector8_broadcast(0x0f), vector8_broadcast(0));
     324                 :           0 :         Vector8         tmp;
     325                 :           0 :         bool            ret;
     326                 :             : 
     327                 :           0 :         tmp = vector8_gt(vector8_broadcast('9' + 1), src);
     328                 :           0 :         sub = vector8_and(tmp, vector8_broadcast('0'));
     329                 :             : 
     330                 :           0 :         tmp = vector8_gt(src, vector8_broadcast('A' - 1));
     331                 :           0 :         tmp = vector8_and(tmp, vector8_broadcast('A' - 10));
     332                 :           0 :         sub = vector8_add(sub, tmp);
     333                 :             : 
     334                 :           0 :         tmp = vector8_gt(src, vector8_broadcast('a' - 1));
     335                 :           0 :         tmp = vector8_and(tmp, vector8_broadcast('a' - 'A'));
     336                 :           0 :         sub = vector8_add(sub, tmp);
     337                 :             : 
     338                 :           0 :         *dst = vector8_issub(src, sub);
     339                 :           0 :         ret = !vector8_has_ge(*dst, 0x10);
     340                 :             : 
     341                 :           0 :         tmp = vector8_and(*dst, mask_hi);
     342                 :           0 :         tmp = vector8_shift_right(tmp, 8);
     343                 :           0 :         *dst = vector8_and(*dst, mask_lo);
     344                 :           0 :         *dst = vector8_shift_left(*dst, 4);
     345                 :           0 :         *dst = vector8_or(*dst, tmp);
     346                 :           0 :         return ret;
     347                 :           0 : }
     348                 :             : #endif                                                  /* ! USE_NO_SIMD */
     349                 :             : 
     350                 :             : uint64
     351                 :           0 : hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
     352                 :             : {
     353                 :             : #ifdef USE_NO_SIMD
     354                 :             :         return hex_decode_safe_scalar(src, len, dst, escontext);
     355                 :             : #else
     356                 :           0 :         const uint64 tail_idx = len & ~(sizeof(Vector8) * 2 - 1);
     357                 :           0 :         uint64          i;
     358                 :           0 :         bool            success = true;
     359                 :             : 
     360                 :             :         /*
     361                 :             :          * We must process 2 vectors at a time since the output will be half the
     362                 :             :          * length of the input.
     363                 :             :          */
     364         [ #  # ]:           0 :         for (i = 0; i < tail_idx; i += sizeof(Vector8) * 2)
     365                 :             :         {
     366                 :           0 :                 Vector8         srcv;
     367                 :           0 :                 Vector8         dstv1;
     368                 :           0 :                 Vector8         dstv2;
     369                 :             : 
     370                 :           0 :                 vector8_load(&srcv, (const uint8 *) &src[i]);
     371                 :           0 :                 success &= hex_decode_simd_helper(srcv, &dstv1);
     372                 :             : 
     373                 :           0 :                 vector8_load(&srcv, (const uint8 *) &src[i + sizeof(Vector8)]);
     374                 :           0 :                 success &= hex_decode_simd_helper(srcv, &dstv2);
     375                 :             : 
     376                 :           0 :                 vector8_store((uint8 *) &dst[i / 2], vector8_pack_16(dstv1, dstv2));
     377                 :           0 :         }
     378                 :             : 
     379                 :             :         /*
     380                 :             :          * If something didn't look right in the vector path, try again in the
     381                 :             :          * scalar path so that we can handle it correctly.
     382                 :             :          */
     383         [ #  # ]:           0 :         if (!success)
     384                 :           0 :                 i = 0;
     385                 :             : 
     386                 :           0 :         return i / 2 + hex_decode_safe_scalar(src + i, len - i, dst + i / 2, escontext);
     387                 :             : #endif
     388                 :           0 : }
     389                 :             : 
     390                 :             : static uint64
     391                 :           0 : hex_enc_len(const char *src, size_t srclen)
     392                 :             : {
     393                 :           0 :         return (uint64) srclen << 1;
     394                 :             : }
     395                 :             : 
     396                 :             : static uint64
     397                 :           0 : hex_dec_len(const char *src, size_t srclen)
     398                 :             : {
     399                 :           0 :         return (uint64) srclen >> 1;
     400                 :             : }
     401                 :             : 
     402                 :             : /*
     403                 :             :  * BASE64 and BASE64URL
     404                 :             :  */
     405                 :             : 
     406                 :             : static const char _base64[] =
     407                 :             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     408                 :             : 
     409                 :             : static const char _base64url[] =
     410                 :             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
     411                 :             : 
     412                 :             : static const int8 b64lookup[128] = {
     413                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     414                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
     415                 :             :         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
     416                 :             :         52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
     417                 :             :         -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
     418                 :             :         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
     419                 :             :         -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
     420                 :             :         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
     421                 :             : };
     422                 :             : 
     423                 :             : /*
     424                 :             :  * pg_base64_encode_internal
     425                 :             :  *
     426                 :             :  * Helper for decoding base64 or base64url.  When url is passed as true the
     427                 :             :  * input will be encoded using base64url.  len bytes in src is encoded into
     428                 :             :  * dst.
     429                 :             :  */
     430                 :             : static uint64
     431                 :           0 : pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
     432                 :             : {
     433                 :           0 :         char       *p,
     434                 :           0 :                            *lend = dst + 76;
     435                 :           0 :         const char *s,
     436                 :           0 :                            *end = src + len;
     437                 :           0 :         int                     pos = 2;
     438                 :           0 :         uint32          buf = 0;
     439                 :           0 :         const char *alphabet = url ? _base64url : _base64;
     440                 :             : 
     441                 :           0 :         s = src;
     442                 :           0 :         p = dst;
     443                 :             : 
     444         [ #  # ]:           0 :         while (s < end)
     445                 :             :         {
     446                 :           0 :                 buf |= (unsigned char) *s << (pos << 3);
     447                 :           0 :                 pos--;
     448                 :           0 :                 s++;
     449                 :             : 
     450                 :             :                 /* write it out */
     451         [ #  # ]:           0 :                 if (pos < 0)
     452                 :             :                 {
     453                 :           0 :                         *p++ = alphabet[(buf >> 18) & 0x3f];
     454                 :           0 :                         *p++ = alphabet[(buf >> 12) & 0x3f];
     455                 :           0 :                         *p++ = alphabet[(buf >> 6) & 0x3f];
     456                 :           0 :                         *p++ = alphabet[buf & 0x3f];
     457                 :             : 
     458                 :           0 :                         pos = 2;
     459                 :           0 :                         buf = 0;
     460                 :             : 
     461   [ #  #  #  # ]:           0 :                         if (!url && p >= lend)
     462                 :             :                         {
     463                 :           0 :                                 *p++ = '\n';
     464                 :           0 :                                 lend = p + 76;
     465                 :           0 :                         }
     466                 :           0 :                 }
     467                 :             :         }
     468                 :             : 
     469                 :             :         /* Handle remaining bytes in buf */
     470         [ #  # ]:           0 :         if (pos != 2)
     471                 :             :         {
     472                 :           0 :                 *p++ = alphabet[(buf >> 18) & 0x3f];
     473                 :           0 :                 *p++ = alphabet[(buf >> 12) & 0x3f];
     474                 :             : 
     475         [ #  # ]:           0 :                 if (pos == 0)
     476                 :             :                 {
     477                 :           0 :                         *p++ = alphabet[(buf >> 6) & 0x3f];
     478         [ #  # ]:           0 :                         if (!url)
     479                 :           0 :                                 *p++ = '=';
     480                 :           0 :                 }
     481         [ #  # ]:           0 :                 else if (!url)
     482                 :             :                 {
     483                 :           0 :                         *p++ = '=';
     484                 :           0 :                         *p++ = '=';
     485                 :           0 :                 }
     486                 :           0 :         }
     487                 :             : 
     488                 :           0 :         return p - dst;
     489                 :           0 : }
     490                 :             : 
     491                 :             : static uint64
     492                 :           0 : pg_base64_encode(const char *src, size_t len, char *dst)
     493                 :             : {
     494                 :           0 :         return pg_base64_encode_internal(src, len, dst, false);
     495                 :             : }
     496                 :             : 
     497                 :             : static uint64
     498                 :           0 : pg_base64url_encode(const char *src, size_t len, char *dst)
     499                 :             : {
     500                 :           0 :         return pg_base64_encode_internal(src, len, dst, true);
     501                 :             : }
     502                 :             : 
     503                 :             : /*
     504                 :             :  * pg_base64_decode_internal
     505                 :             :  *
     506                 :             :  * Helper for decoding base64 or base64url. When url is passed as true the
     507                 :             :  * input will be assumed to be encoded using base64url.
     508                 :             :  */
     509                 :             : static uint64
     510                 :           0 : pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
     511                 :             : {
     512                 :           0 :         const char *srcend = src + len,
     513                 :           0 :                            *s = src;
     514                 :           0 :         char       *p = dst;
     515                 :           0 :         char            c;
     516                 :           0 :         int                     b = 0;
     517                 :           0 :         uint32          buf = 0;
     518                 :           0 :         int                     pos = 0,
     519                 :           0 :                                 end = 0;
     520                 :             : 
     521         [ #  # ]:           0 :         while (s < srcend)
     522                 :             :         {
     523                 :           0 :                 c = *s++;
     524                 :             : 
     525   [ #  #  #  #  :           0 :                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
             #  #  #  # ]
     526                 :           0 :                         continue;
     527                 :             : 
     528                 :             :                 /* convert base64url to base64 */
     529         [ #  # ]:           0 :                 if (url)
     530                 :             :                 {
     531         [ #  # ]:           0 :                         if (c == '-')
     532                 :           0 :                                 c = '+';
     533         [ #  # ]:           0 :                         else if (c == '_')
     534                 :           0 :                                 c = '/';
     535                 :           0 :                 }
     536                 :             : 
     537         [ #  # ]:           0 :                 if (c == '=')
     538                 :             :                 {
     539                 :             :                         /* end sequence */
     540         [ #  # ]:           0 :                         if (!end)
     541                 :             :                         {
     542         [ #  # ]:           0 :                                 if (pos == 2)
     543                 :           0 :                                         end = 1;
     544         [ #  # ]:           0 :                                 else if (pos == 3)
     545                 :           0 :                                         end = 2;
     546                 :             :                                 else
     547                 :             :                                 {
     548                 :             :                                         /* translator: %s is the name of an encoding scheme */
     549   [ #  #  #  # ]:           0 :                                         ereport(ERROR,
     550                 :             :                                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     551                 :             :                                                          errmsg("unexpected \"=\" while decoding %s sequence", url ? "base64url" : "base64")));
     552                 :             :                                 }
     553                 :           0 :                         }
     554                 :           0 :                         b = 0;
     555                 :           0 :                 }
     556                 :             :                 else
     557                 :             :                 {
     558                 :           0 :                         b = -1;
     559   [ #  #  #  # ]:           0 :                         if (c > 0 && c < 127)
     560                 :           0 :                                 b = b64lookup[(unsigned char) c];
     561         [ #  # ]:           0 :                         if (b < 0)
     562                 :             :                         {
     563                 :             :                                 /* translator: %s is the name of an encoding scheme */
     564   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     565                 :             :                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     566                 :             :                                                  errmsg("invalid symbol \"%.*s\" found while decoding %s sequence",
     567                 :             :                                                                 pg_mblen(s - 1), s - 1,
     568                 :             :                                                                 url ? "base64url" : "base64")));
     569                 :           0 :                         }
     570                 :             :                 }
     571                 :             :                 /* add it to buffer */
     572                 :           0 :                 buf = (buf << 6) + b;
     573                 :           0 :                 pos++;
     574         [ #  # ]:           0 :                 if (pos == 4)
     575                 :             :                 {
     576                 :           0 :                         *p++ = (buf >> 16) & 255;
     577   [ #  #  #  # ]:           0 :                         if (end == 0 || end > 1)
     578                 :           0 :                                 *p++ = (buf >> 8) & 255;
     579   [ #  #  #  # ]:           0 :                         if (end == 0 || end > 2)
     580                 :           0 :                                 *p++ = buf & 255;
     581                 :           0 :                         buf = 0;
     582                 :           0 :                         pos = 0;
     583                 :           0 :                 }
     584                 :             :         }
     585                 :             : 
     586         [ #  # ]:           0 :         if (pos == 2)
     587                 :             :         {
     588                 :           0 :                 buf <<= 12;
     589                 :           0 :                 *p++ = (buf >> 16) & 0xFF;
     590                 :           0 :         }
     591         [ #  # ]:           0 :         else if (pos == 3)
     592                 :             :         {
     593                 :           0 :                 buf <<= 6;
     594                 :           0 :                 *p++ = (buf >> 16) & 0xFF;
     595                 :           0 :                 *p++ = (buf >> 8) & 0xFF;
     596                 :           0 :         }
     597         [ #  # ]:           0 :         else if (pos != 0)
     598                 :             :         {
     599                 :             :                 /* translator: %s is the name of an encoding scheme */
     600   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     601                 :             :                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     602                 :             :                                  errmsg("invalid %s end sequence", url ? "base64url" : "base64"),
     603                 :             :                                  errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
     604                 :           0 :         }
     605                 :             : 
     606                 :           0 :         return p - dst;
     607                 :           0 : }
     608                 :             : 
     609                 :             : static uint64
     610                 :           0 : pg_base64_decode(const char *src, size_t len, char *dst)
     611                 :             : {
     612                 :           0 :         return pg_base64_decode_internal(src, len, dst, false);
     613                 :             : }
     614                 :             : 
     615                 :             : static uint64
     616                 :           0 : pg_base64url_decode(const char *src, size_t len, char *dst)
     617                 :             : {
     618                 :           0 :         return pg_base64_decode_internal(src, len, dst, true);
     619                 :             : }
     620                 :             : 
     621                 :             : static uint64
     622                 :           0 : pg_base64_enc_len(const char *src, size_t srclen)
     623                 :             : {
     624                 :             :         /* 3 bytes will be converted to 4, linefeed after 76 chars */
     625                 :           0 :         return ((uint64) srclen + 2) / 3 * 4 + (uint64) srclen / (76 * 3 / 4);
     626                 :             : }
     627                 :             : 
     628                 :             : static uint64
     629                 :           0 : pg_base64_dec_len(const char *src, size_t srclen)
     630                 :             : {
     631                 :           0 :         return ((uint64) srclen * 3) >> 2;
     632                 :             : }
     633                 :             : 
     634                 :             : static uint64
     635                 :           0 : pg_base64url_enc_len(const char *src, size_t srclen)
     636                 :             : {
     637                 :             :         /*
     638                 :             :          * Unlike standard base64, base64url doesn't use padding characters when
     639                 :             :          * the input length is not divisible by 3
     640                 :             :          */
     641                 :           0 :         return (srclen + 2) / 3 * 4;
     642                 :             : }
     643                 :             : 
     644                 :             : static uint64
     645                 :           0 : pg_base64url_dec_len(const char *src, size_t srclen)
     646                 :             : {
     647                 :             :         /*
     648                 :             :          * For base64, each 4 characters of input produce at most 3 bytes of
     649                 :             :          * output.  For base64url without padding, we need to round up to the
     650                 :             :          * nearest 4
     651                 :             :          */
     652                 :           0 :         size_t          adjusted_len = srclen;
     653                 :             : 
     654         [ #  # ]:           0 :         if (srclen % 4 != 0)
     655                 :           0 :                 adjusted_len += 4 - (srclen % 4);
     656                 :             : 
     657                 :           0 :         return (adjusted_len * 3) / 4;
     658                 :           0 : }
     659                 :             : 
     660                 :             : /*
     661                 :             :  * Escape
     662                 :             :  * Minimally escape bytea to text.
     663                 :             :  * De-escape text to bytea.
     664                 :             :  *
     665                 :             :  * We must escape zero bytes and high-bit-set bytes to avoid generating
     666                 :             :  * text that might be invalid in the current encoding, or that might
     667                 :             :  * change to something else if passed through an encoding conversion
     668                 :             :  * (leading to failing to de-escape to the original bytea value).
     669                 :             :  * Also of course backslash itself has to be escaped.
     670                 :             :  *
     671                 :             :  * De-escaping processes \\ and any \### octal
     672                 :             :  */
     673                 :             : 
     674                 :             : #define VAL(CH)                 ((CH) - '0')
     675                 :             : #define DIG(VAL)                ((VAL) + '0')
     676                 :             : 
     677                 :             : static uint64
     678                 :           0 : esc_encode(const char *src, size_t srclen, char *dst)
     679                 :             : {
     680                 :           0 :         const char *end = src + srclen;
     681                 :           0 :         char       *rp = dst;
     682                 :           0 :         uint64          len = 0;
     683                 :             : 
     684         [ #  # ]:           0 :         while (src < end)
     685                 :             :         {
     686                 :           0 :                 unsigned char c = (unsigned char) *src;
     687                 :             : 
     688   [ #  #  #  # ]:           0 :                 if (c == '\0' || IS_HIGHBIT_SET(c))
     689                 :             :                 {
     690                 :           0 :                         rp[0] = '\\';
     691                 :           0 :                         rp[1] = DIG(c >> 6);
     692                 :           0 :                         rp[2] = DIG((c >> 3) & 7);
     693                 :           0 :                         rp[3] = DIG(c & 7);
     694                 :           0 :                         rp += 4;
     695                 :           0 :                         len += 4;
     696                 :           0 :                 }
     697         [ #  # ]:           0 :                 else if (c == '\\')
     698                 :             :                 {
     699                 :           0 :                         rp[0] = '\\';
     700                 :           0 :                         rp[1] = '\\';
     701                 :           0 :                         rp += 2;
     702                 :           0 :                         len += 2;
     703                 :           0 :                 }
     704                 :             :                 else
     705                 :             :                 {
     706                 :           0 :                         *rp++ = c;
     707                 :           0 :                         len++;
     708                 :             :                 }
     709                 :             : 
     710                 :           0 :                 src++;
     711                 :           0 :         }
     712                 :             : 
     713                 :           0 :         return len;
     714                 :           0 : }
     715                 :             : 
     716                 :             : static uint64
     717                 :           0 : esc_decode(const char *src, size_t srclen, char *dst)
     718                 :             : {
     719                 :           0 :         const char *end = src + srclen;
     720                 :           0 :         char       *rp = dst;
     721                 :           0 :         uint64          len = 0;
     722                 :             : 
     723         [ #  # ]:           0 :         while (src < end)
     724                 :             :         {
     725         [ #  # ]:           0 :                 if (src[0] != '\\')
     726                 :           0 :                         *rp++ = *src++;
     727         [ #  # ]:           0 :                 else if (src + 3 < end &&
     728   [ #  #  #  # ]:           0 :                                  (src[1] >= '0' && src[1] <= '3') &&
     729   [ #  #  #  #  :           0 :                                  (src[2] >= '0' && src[2] <= '7') &&
                   #  # ]
     730         [ #  # ]:           0 :                                  (src[3] >= '0' && src[3] <= '7'))
     731                 :             :                 {
     732                 :           0 :                         int                     val;
     733                 :             : 
     734                 :           0 :                         val = VAL(src[1]);
     735                 :           0 :                         val <<= 3;
     736                 :           0 :                         val += VAL(src[2]);
     737                 :           0 :                         val <<= 3;
     738                 :           0 :                         *rp++ = val + VAL(src[3]);
     739                 :           0 :                         src += 4;
     740                 :           0 :                 }
     741         [ #  # ]:           0 :                 else if (src + 1 < end &&
     742                 :           0 :                                  (src[1] == '\\'))
     743                 :             :                 {
     744                 :           0 :                         *rp++ = '\\';
     745                 :           0 :                         src += 2;
     746                 :           0 :                 }
     747                 :             :                 else
     748                 :             :                 {
     749                 :             :                         /*
     750                 :             :                          * One backslash, not followed by ### valid octal. Should never
     751                 :             :                          * get here, since esc_dec_len does same check.
     752                 :             :                          */
     753   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     754                 :             :                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     755                 :             :                                          errmsg("invalid input syntax for type %s", "bytea")));
     756                 :             :                 }
     757                 :             : 
     758                 :           0 :                 len++;
     759                 :             :         }
     760                 :             : 
     761                 :           0 :         return len;
     762                 :           0 : }
     763                 :             : 
     764                 :             : static uint64
     765                 :           0 : esc_enc_len(const char *src, size_t srclen)
     766                 :             : {
     767                 :           0 :         const char *end = src + srclen;
     768                 :           0 :         uint64          len = 0;
     769                 :             : 
     770         [ #  # ]:           0 :         while (src < end)
     771                 :             :         {
     772   [ #  #  #  # ]:           0 :                 if (*src == '\0' || IS_HIGHBIT_SET(*src))
     773                 :           0 :                         len += 4;
     774         [ #  # ]:           0 :                 else if (*src == '\\')
     775                 :           0 :                         len += 2;
     776                 :             :                 else
     777                 :           0 :                         len++;
     778                 :             : 
     779                 :           0 :                 src++;
     780                 :             :         }
     781                 :             : 
     782                 :           0 :         return len;
     783                 :           0 : }
     784                 :             : 
     785                 :             : static uint64
     786                 :           0 : esc_dec_len(const char *src, size_t srclen)
     787                 :             : {
     788                 :           0 :         const char *end = src + srclen;
     789                 :           0 :         uint64          len = 0;
     790                 :             : 
     791         [ #  # ]:           0 :         while (src < end)
     792                 :             :         {
     793         [ #  # ]:           0 :                 if (src[0] != '\\')
     794                 :           0 :                         src++;
     795         [ #  # ]:           0 :                 else if (src + 3 < end &&
     796   [ #  #  #  # ]:           0 :                                  (src[1] >= '0' && src[1] <= '3') &&
     797   [ #  #  #  #  :           0 :                                  (src[2] >= '0' && src[2] <= '7') &&
                   #  # ]
     798         [ #  # ]:           0 :                                  (src[3] >= '0' && src[3] <= '7'))
     799                 :             :                 {
     800                 :             :                         /*
     801                 :             :                          * backslash + valid octal
     802                 :             :                          */
     803                 :           0 :                         src += 4;
     804                 :           0 :                 }
     805         [ #  # ]:           0 :                 else if (src + 1 < end &&
     806                 :           0 :                                  (src[1] == '\\'))
     807                 :             :                 {
     808                 :             :                         /*
     809                 :             :                          * two backslashes = backslash
     810                 :             :                          */
     811                 :           0 :                         src += 2;
     812                 :           0 :                 }
     813                 :             :                 else
     814                 :             :                 {
     815                 :             :                         /*
     816                 :             :                          * one backslash, not followed by ### valid octal
     817                 :             :                          */
     818   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     819                 :             :                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     820                 :             :                                          errmsg("invalid input syntax for type %s", "bytea")));
     821                 :             :                 }
     822                 :             : 
     823                 :           0 :                 len++;
     824                 :             :         }
     825                 :           0 :         return len;
     826                 :           0 : }
     827                 :             : 
     828                 :             : /*
     829                 :             :  * Common
     830                 :             :  */
     831                 :             : 
     832                 :             : static const struct
     833                 :             : {
     834                 :             :         const char *name;
     835                 :             :         struct pg_encoding enc;
     836                 :             : }                       enclist[] =
     837                 :             : 
     838                 :             : {
     839                 :             :         {
     840                 :             :                 "hex",
     841                 :             :                 {
     842                 :             :                         hex_enc_len, hex_dec_len, hex_encode, hex_decode
     843                 :             :                 }
     844                 :             :         },
     845                 :             :         {
     846                 :             :                 "base64",
     847                 :             :                 {
     848                 :             :                         pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
     849                 :             :                 }
     850                 :             :         },
     851                 :             :         {
     852                 :             :                 "base64url",
     853                 :             :                 {
     854                 :             :                         pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
     855                 :             :                 }
     856                 :             :         },
     857                 :             :         {
     858                 :             :                 "escape",
     859                 :             :                 {
     860                 :             :                         esc_enc_len, esc_dec_len, esc_encode, esc_decode
     861                 :             :                 }
     862                 :             :         },
     863                 :             :         {
     864                 :             :                 NULL,
     865                 :             :                 {
     866                 :             :                         NULL, NULL, NULL, NULL
     867                 :             :                 }
     868                 :             :         }
     869                 :             : };
     870                 :             : 
     871                 :             : static const struct pg_encoding *
     872                 :       35115 : pg_find_encoding(const char *name)
     873                 :             : {
     874                 :       35115 :         int                     i;
     875                 :             : 
     876         [ +  + ]:       35226 :         for (i = 0; enclist[i].name; i++)
     877         [ +  + ]:       35224 :                 if (pg_strcasecmp(enclist[i].name, name) == 0)
     878                 :       35113 :                         return &enclist[i].enc;
     879                 :             : 
     880                 :           2 :         return NULL;
     881                 :       35115 : }

Generated by: LCOV version 2.3.2-1