LCOV - Code coverage - src/backend/access/hash/hashfunc.c

LCOV - code coverage report

Current view:	top level - src/backend/access/hash - hashfunc.c (source / functions)		Coverage	Total	Hit
Test:	Code coverage	Lines:	91.7 %	168	154
Test Date:	2026-01-26 10:56:24	Functions:	92.3 %	26	24
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	Branches:	58.3 %	72	42

             Branch data     Line data    Source code

       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * hashfunc.c
       4                 :             :  *        Support functions for hash access method.
       5                 :             :  *
       6                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :             :  *
       9                 :             :  *
      10                 :             :  * IDENTIFICATION
      11                 :             :  *        src/backend/access/hash/hashfunc.c
      12                 :             :  *
      13                 :             :  * NOTES
      14                 :             :  *        These functions are stored in pg_amproc.  For each operator class
      15                 :             :  *        defined for hash indexes, they compute the hash value of the argument.
      16                 :             :  *
      17                 :             :  *        Additional hash functions appear in /utils/adt/ files for various
      18                 :             :  *        specialized datatypes.
      19                 :             :  *
      20                 :             :  *        It is expected that every bit of a hash function's 32-bit result is
      21                 :             :  *        as random as every other; failure to ensure this is likely to lead
      22                 :             :  *        to poor performance of hash joins, for example.  In most cases a hash
      23                 :             :  *        function should use hash_any() or its variant hash_uint32().
      24                 :             :  *-------------------------------------------------------------------------
      25                 :             :  */
      26                 :             : 
      27                 :             : #include "postgres.h"
      28                 :             : 
      29                 :             : #include "common/hashfn.h"
      30                 :             : #include "utils/float.h"
      31                 :             : #include "utils/fmgrprotos.h"
      32                 :             : #include "utils/pg_locale.h"
      33                 :             : #include "varatt.h"
      34                 :             : 
      35                 :             : /*
      36                 :             :  * Datatype-specific hash functions.
      37                 :             :  *
      38                 :             :  * These support both hash indexes and hash joins.
      39                 :             :  *
      40                 :             :  * NOTE: some of these are also used by catcache operations, without
      41                 :             :  * any direct connection to hash indexes.  Also, the common hash_any
      42                 :             :  * routine is also used by dynahash tables.
      43                 :             :  */
      44                 :             : 
      45                 :             : /* Note: this is used for both "char" and boolean datatypes */
      46                 :             : Datum
      47                 :        2939 : hashchar(PG_FUNCTION_ARGS)
      48                 :             : {
      49                 :        2939 :         return hash_uint32((int32) PG_GETARG_CHAR(0));
      50                 :             : }
      51                 :             : 
      52                 :             : Datum
      53                 :          11 : hashcharextended(PG_FUNCTION_ARGS)
      54                 :             : {
      55                 :          11 :         return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
      56                 :             : }
      57                 :             : 
      58                 :             : Datum
      59                 :       94657 : hashint2(PG_FUNCTION_ARGS)
      60                 :             : {
      61                 :       94657 :         return hash_uint32((int32) PG_GETARG_INT16(0));
      62                 :             : }
      63                 :             : 
      64                 :             : Datum
      65                 :           8 : hashint2extended(PG_FUNCTION_ARGS)
      66                 :             : {
      67                 :           8 :         return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
      68                 :             : }
      69                 :             : 
      70                 :             : Datum
      71                 :     4562491 : hashint4(PG_FUNCTION_ARGS)
      72                 :             : {
      73                 :     4562491 :         return hash_uint32(PG_GETARG_INT32(0));
      74                 :             : }
      75                 :             : 
      76                 :             : Datum
      77                 :         130 : hashint4extended(PG_FUNCTION_ARGS)
      78                 :             : {
      79                 :         130 :         return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
      80                 :             : }
      81                 :             : 
      82                 :             : Datum
      83                 :      104962 : hashint8(PG_FUNCTION_ARGS)
      84                 :             : {
      85                 :             :         /*
      86                 :             :          * The idea here is to produce a hash value compatible with the values
      87                 :             :          * produced by hashint4 and hashint2 for logically equal inputs; this is
      88                 :             :          * necessary to support cross-type hash joins across these input types.
      89                 :             :          * Since all three types are signed, we can xor the high half of the int8
      90                 :             :          * value if the sign is positive, or the complement of the high half when
      91                 :             :          * the sign is negative.
      92                 :             :          */
      93                 :      104962 :         int64           val = PG_GETARG_INT64(0);
      94                 :      104962 :         uint32          lohalf = (uint32) val;
      95                 :      104962 :         uint32          hihalf = (uint32) (val >> 32);
      96                 :             : 
      97         [ +  + ]:      104962 :         lohalf ^= (val >= 0) ? hihalf : ~hihalf;
      98                 :             : 
      99                 :      209924 :         return hash_uint32(lohalf);
     100                 :      104962 : }
     101                 :             : 
     102                 :             : Datum
     103                 :          74 : hashint8extended(PG_FUNCTION_ARGS)
     104                 :             : {
     105                 :             :         /* Same approach as hashint8 */
     106                 :          74 :         int64           val = PG_GETARG_INT64(0);
     107                 :          74 :         uint32          lohalf = (uint32) val;
     108                 :          74 :         uint32          hihalf = (uint32) (val >> 32);
     109                 :             : 
     110         [ +  - ]:          74 :         lohalf ^= (val >= 0) ? hihalf : ~hihalf;
     111                 :             : 
     112                 :         148 :         return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
     113                 :          74 : }
     114                 :             : 
     115                 :             : Datum
     116                 :      953707 : hashoid(PG_FUNCTION_ARGS)
     117                 :             : {
     118                 :      953707 :         return hash_uint32((uint32) PG_GETARG_OID(0));
     119                 :             : }
     120                 :             : 
     121                 :             : Datum
     122                 :          12 : hashoidextended(PG_FUNCTION_ARGS)
     123                 :             : {
     124                 :          12 :         return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     125                 :             : }
     126                 :             : 
     127                 :             : Datum
     128                 :          13 : hashenum(PG_FUNCTION_ARGS)
     129                 :             : {
     130                 :          13 :         return hash_uint32((uint32) PG_GETARG_OID(0));
     131                 :             : }
     132                 :             : 
     133                 :             : Datum
     134                 :           6 : hashenumextended(PG_FUNCTION_ARGS)
     135                 :             : {
     136                 :           6 :         return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
     137                 :             : }
     138                 :             : 
     139                 :             : Datum
     140                 :         385 : hashfloat4(PG_FUNCTION_ARGS)
     141                 :             : {
     142                 :         385 :         float4          key = PG_GETARG_FLOAT4(0);
     143                 :         385 :         float8          key8;
     144                 :             : 
     145                 :             :         /*
     146                 :             :          * On IEEE-float machines, minus zero and zero have different bit patterns
     147                 :             :          * but should compare as equal.  We must ensure that they have the same
     148                 :             :          * hash value, which is most reliably done this way:
     149                 :             :          */
     150         [ +  + ]:         385 :         if (key == (float4) 0)
     151                 :           4 :                 PG_RETURN_UINT32(0);
     152                 :             : 
     153                 :             :         /*
     154                 :             :          * To support cross-type hashing of float8 and float4, we want to return
     155                 :             :          * the same hash value hashfloat8 would produce for an equal float8 value.
     156                 :             :          * So, widen the value to float8 and hash that.  (We must do this rather
     157                 :             :          * than have hashfloat8 try to narrow its value to float4; that could fail
     158                 :             :          * on overflow.)
     159                 :             :          */
     160                 :         381 :         key8 = key;
     161                 :             : 
     162                 :             :         /*
     163                 :             :          * Similarly, NaNs can have different bit patterns but they should all
     164                 :             :          * compare as equal.  For backwards-compatibility reasons we force them to
     165                 :             :          * have the hash value of a standard float8 NaN.  (You'd think we could
     166                 :             :          * replace key with a float4 NaN and then widen it; but on some old
     167                 :             :          * platforms, that way produces a different bit pattern.)
     168                 :             :          */
     169   [ -  +  +  +  :         381 :         if (isnan(key8))
                   +  - ]
     170                 :         759 :                 key8 = get_float8_nan();
     171                 :             : 
     172                 :         381 :         return hash_any((unsigned char *) &key8, sizeof(key8));
     173                 :         385 : }
     174                 :             : 
     175                 :             : Datum
     176                 :          12 : hashfloat4extended(PG_FUNCTION_ARGS)
     177                 :             : {
     178                 :          12 :         float4          key = PG_GETARG_FLOAT4(0);
     179                 :          12 :         uint64          seed = PG_GETARG_INT64(1);
     180                 :          12 :         float8          key8;
     181                 :             : 
     182                 :             :         /* Same approach as hashfloat4 */
     183         [ +  + ]:          12 :         if (key == (float4) 0)
     184                 :           2 :                 PG_RETURN_UINT64(seed);
     185                 :          10 :         key8 = key;
     186   [ -  +  +  +  :          10 :         if (isnan(key8))
                   +  - ]
     187                 :          20 :                 key8 = get_float8_nan();
     188                 :             : 
     189                 :          10 :         return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
     190                 :          12 : }
     191                 :             : 
     192                 :             : Datum
     193                 :       22596 : hashfloat8(PG_FUNCTION_ARGS)
     194                 :             : {
     195                 :       22596 :         float8          key = PG_GETARG_FLOAT8(0);
     196                 :             : 
     197                 :             :         /*
     198                 :             :          * On IEEE-float machines, minus zero and zero have different bit patterns
     199                 :             :          * but should compare as equal.  We must ensure that they have the same
     200                 :             :          * hash value, which is most reliably done this way:
     201                 :             :          */
     202         [ +  + ]:       22596 :         if (key == (float8) 0)
     203                 :          81 :                 PG_RETURN_UINT32(0);
     204                 :             : 
     205                 :             :         /*
     206                 :             :          * Similarly, NaNs can have different bit patterns but they should all
     207                 :             :          * compare as equal.  For backwards-compatibility reasons we force them to
     208                 :             :          * have the hash value of a standard NaN.
     209                 :             :          */
     210   [ -  +  +  +  :       22515 :         if (isnan(key))
                   +  - ]
     211                 :       45027 :                 key = get_float8_nan();
     212                 :             : 
     213                 :       22515 :         return hash_any((unsigned char *) &key, sizeof(key));
     214                 :       22596 : }
     215                 :             : 
     216                 :             : Datum
     217                 :          12 : hashfloat8extended(PG_FUNCTION_ARGS)
     218                 :             : {
     219                 :          12 :         float8          key = PG_GETARG_FLOAT8(0);
     220                 :          12 :         uint64          seed = PG_GETARG_INT64(1);
     221                 :             : 
     222                 :             :         /* Same approach as hashfloat8 */
     223         [ +  + ]:          12 :         if (key == (float8) 0)
     224                 :           2 :                 PG_RETURN_UINT64(seed);
     225   [ -  +  +  +  :          10 :         if (isnan(key))
                   +  - ]
     226                 :          20 :                 key = get_float8_nan();
     227                 :             : 
     228                 :          10 :         return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
     229                 :          12 : }
     230                 :             : 
     231                 :             : Datum
     232                 :       36649 : hashoidvector(PG_FUNCTION_ARGS)
     233                 :             : {
     234                 :       36649 :         oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     235                 :             : 
     236                 :       73298 :         return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
     237                 :       36649 : }
     238                 :             : 
     239                 :             : Datum
     240                 :          10 : hashoidvectorextended(PG_FUNCTION_ARGS)
     241                 :             : {
     242                 :          10 :         oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
     243                 :             : 
     244                 :          30 :         return hash_any_extended((unsigned char *) key->values,
     245                 :          10 :                                                          key->dim1 * sizeof(Oid),
     246                 :          10 :                                                          PG_GETARG_INT64(1));
     247                 :          10 : }
     248                 :             : 
     249                 :             : Datum
     250                 :       82289 : hashname(PG_FUNCTION_ARGS)
     251                 :             : {
     252                 :       82289 :         char       *key = NameStr(*PG_GETARG_NAME(0));
     253                 :             : 
     254                 :      164578 :         return hash_any((unsigned char *) key, strlen(key));
     255                 :       82289 : }
     256                 :             : 
     257                 :             : Datum
     258                 :          10 : hashnameextended(PG_FUNCTION_ARGS)
     259                 :             : {
     260                 :          10 :         char       *key = NameStr(*PG_GETARG_NAME(0));
     261                 :             : 
     262                 :          30 :         return hash_any_extended((unsigned char *) key, strlen(key),
     263                 :          10 :                                                          PG_GETARG_INT64(1));
     264                 :          10 : }
     265                 :             : 
     266                 :             : Datum
     267                 :      171795 : hashtext(PG_FUNCTION_ARGS)
     268                 :             : {
     269                 :      171795 :         text       *key = PG_GETARG_TEXT_PP(0);
     270                 :      171795 :         Oid                     collid = PG_GET_COLLATION();
     271                 :      171795 :         pg_locale_t mylocale;
     272                 :      171795 :         Datum           result;
     273                 :             : 
     274         [ +  + ]:      171795 :         if (!collid)
     275   [ +  -  +  - ]:           1 :                 ereport(ERROR,
     276                 :             :                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     277                 :             :                                  errmsg("could not determine which collation to use for string hashing"),
     278                 :             :                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     279                 :             : 
     280                 :      171794 :         mylocale = pg_newlocale_from_collation(collid);
     281                 :             : 
     282         [ +  + ]:      171794 :         if (mylocale->deterministic)
     283                 :             :         {
     284                 :      342698 :                 result = hash_any((unsigned char *) VARDATA_ANY(key),
     285                 :      171349 :                                                   VARSIZE_ANY_EXHDR(key));
     286                 :      171349 :         }
     287                 :             :         else
     288                 :             :         {
     289                 :         445 :                 Size            bsize,
     290                 :             :                                         rsize;
     291                 :         445 :                 char       *buf;
     292                 :         445 :                 const char *keydata = VARDATA_ANY(key);
     293                 :         445 :                 size_t          keylen = VARSIZE_ANY_EXHDR(key);
     294                 :             : 
     295                 :             : 
     296                 :         445 :                 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     297                 :         445 :                 buf = palloc(bsize + 1);
     298                 :             : 
     299                 :         445 :                 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     300                 :             : 
     301                 :             :                 /* the second call may return a smaller value than the first */
     302         [ +  - ]:         445 :                 if (rsize > bsize)
     303   [ #  #  #  # ]:           0 :                         elog(ERROR, "pg_strnxfrm() returned unexpected result");
     304                 :             : 
     305                 :             :                 /*
     306                 :             :                  * In principle, there's no reason to include the terminating NUL
     307                 :             :                  * character in the hash, but it was done before and the behavior must
     308                 :             :                  * be preserved.
     309                 :             :                  */
     310                 :         445 :                 result = hash_any((uint8_t *) buf, bsize + 1);
     311                 :             : 
     312                 :         445 :                 pfree(buf);
     313                 :         445 :         }
     314                 :             : 
     315                 :             :         /* Avoid leaking memory for toasted inputs */
     316         [ +  - ]:      171794 :         PG_FREE_IF_COPY(key, 0);
     317                 :             : 
     318                 :      343588 :         return result;
     319                 :      171794 : }
     320                 :             : 
     321                 :             : Datum
     322                 :         678 : hashtextextended(PG_FUNCTION_ARGS)
     323                 :             : {
     324                 :         678 :         text       *key = PG_GETARG_TEXT_PP(0);
     325                 :         678 :         Oid                     collid = PG_GET_COLLATION();
     326                 :         678 :         pg_locale_t mylocale;
     327                 :         678 :         Datum           result;
     328                 :             : 
     329         [ +  - ]:         678 :         if (!collid)
     330   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     331                 :             :                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     332                 :             :                                  errmsg("could not determine which collation to use for string hashing"),
     333                 :             :                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     334                 :             : 
     335                 :         678 :         mylocale = pg_newlocale_from_collation(collid);
     336                 :             : 
     337         [ +  + ]:         678 :         if (mylocale->deterministic)
     338                 :             :         {
     339                 :        1348 :                 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     340                 :         674 :                                                                    VARSIZE_ANY_EXHDR(key),
     341                 :         674 :                                                                    PG_GETARG_INT64(1));
     342                 :         674 :         }
     343                 :             :         else
     344                 :             :         {
     345                 :           4 :                 Size            bsize,
     346                 :             :                                         rsize;
     347                 :           4 :                 char       *buf;
     348                 :           4 :                 const char *keydata = VARDATA_ANY(key);
     349                 :           4 :                 size_t          keylen = VARSIZE_ANY_EXHDR(key);
     350                 :             : 
     351                 :           4 :                 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
     352                 :           4 :                 buf = palloc(bsize + 1);
     353                 :             : 
     354                 :           4 :                 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
     355                 :             : 
     356                 :             :                 /* the second call may return a smaller value than the first */
     357         [ +  - ]:           4 :                 if (rsize > bsize)
     358   [ #  #  #  # ]:           0 :                         elog(ERROR, "pg_strnxfrm() returned unexpected result");
     359                 :             : 
     360                 :             :                 /*
     361                 :             :                  * In principle, there's no reason to include the terminating NUL
     362                 :             :                  * character in the hash, but it was done before and the behavior must
     363                 :             :                  * be preserved.
     364                 :             :                  */
     365                 :           8 :                 result = hash_any_extended((uint8_t *) buf, bsize + 1,
     366                 :           4 :                                                                    PG_GETARG_INT64(1));
     367                 :             : 
     368                 :           4 :                 pfree(buf);
     369                 :           4 :         }
     370                 :             : 
     371         [ +  - ]:         678 :         PG_FREE_IF_COPY(key, 0);
     372                 :             : 
     373                 :        1356 :         return result;
     374                 :         678 : }
     375                 :             : 
     376                 :             : /*
     377                 :             :  * hashvarlena() can be used for any varlena datatype in which there are
     378                 :             :  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
     379                 :             :  *
     380                 :             :  * (However, you need to define an SQL-level wrapper function around it with
     381                 :             :  * the concrete input data type; otherwise hashvalidate() won't accept it.
     382                 :             :  * Moreover, at least for built-in types, a C-level wrapper function is also
     383                 :             :  * recommended; otherwise, the opr_sanity test will get upset.)
     384                 :             :  */
     385                 :             : Datum
     386                 :        1023 : hashvarlena(PG_FUNCTION_ARGS)
     387                 :             : {
     388                 :        1023 :         struct varlena *key = PG_GETARG_VARLENA_PP(0);
     389                 :        1023 :         Datum           result;
     390                 :             : 
     391                 :        2046 :         result = hash_any((unsigned char *) VARDATA_ANY(key),
     392                 :        1023 :                                           VARSIZE_ANY_EXHDR(key));
     393                 :             : 
     394                 :             :         /* Avoid leaking memory for toasted inputs */
     395         [ +  + ]:        1023 :         PG_FREE_IF_COPY(key, 0);
     396                 :             : 
     397                 :        2046 :         return result;
     398                 :        1023 : }
     399                 :             : 
     400                 :             : Datum
     401                 :           0 : hashvarlenaextended(PG_FUNCTION_ARGS)
     402                 :             : {
     403                 :           0 :         struct varlena *key = PG_GETARG_VARLENA_PP(0);
     404                 :           0 :         Datum           result;
     405                 :             : 
     406                 :           0 :         result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
     407                 :           0 :                                                            VARSIZE_ANY_EXHDR(key),
     408                 :           0 :                                                            PG_GETARG_INT64(1));
     409                 :             : 
     410         [ #  # ]:           0 :         PG_FREE_IF_COPY(key, 0);
     411                 :             : 
     412                 :           0 :         return result;
     413                 :           0 : }
     414                 :             : 
     415                 :             : Datum
     416                 :        1023 : hashbytea(PG_FUNCTION_ARGS)
     417                 :             : {
     418                 :        1023 :         return hashvarlena(fcinfo);
     419                 :             : }
     420                 :             : 
     421                 :             : Datum
     422                 :           0 : hashbyteaextended(PG_FUNCTION_ARGS)
     423                 :             : {
     424                 :           0 :         return hashvarlenaextended(fcinfo);
     425                 :             : }

Generated by: LCOV version 2.3.2-1