LCOV - code coverage report
Current view: top level - src/backend/commands - copyfromparse.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 80.1 % 825 661
Test Date: 2026-01-26 10:56:24 Functions: 87.0 % 23 20
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 53.8 % 731 393

             Branch data     Line data    Source code
       1                 :             : /*-------------------------------------------------------------------------
       2                 :             :  *
       3                 :             :  * copyfromparse.c
       4                 :             :  *              Parse CSV/text/binary format for COPY FROM.
       5                 :             :  *
       6                 :             :  * This file contains routines to parse the text, CSV and binary input
       7                 :             :  * formats.  The main entry point is NextCopyFrom(), which parses the
       8                 :             :  * next input line and returns it as Datums.
       9                 :             :  *
      10                 :             :  * In text/CSV mode, the parsing happens in multiple stages:
      11                 :             :  *
      12                 :             :  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
      13                 :             :  *                1.          2.            3.           4.
      14                 :             :  *
      15                 :             :  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
      16                 :             :  *    places it into 'raw_buf'.
      17                 :             :  *
      18                 :             :  * 2. CopyConvertBuf() calls the encoding conversion function to convert
      19                 :             :  *    the data in 'raw_buf' from client to server encoding, placing the
      20                 :             :  *    converted result in 'input_buf'.
      21                 :             :  *
      22                 :             :  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
      23                 :             :  *    It is responsible for finding the next newline marker, taking quote and
      24                 :             :  *    escape characters into account according to the COPY options.  The line
      25                 :             :  *    is copied into 'line_buf', with quotes and escape characters still
      26                 :             :  *    intact.
      27                 :             :  *
      28                 :             :  * 4. CopyReadAttributesText/CSV() function takes the input line from
      29                 :             :  *    'line_buf', and splits it into fields, unescaping the data as required.
      30                 :             :  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
      31                 :             :  *    pointers to each field.
      32                 :             :  *
      33                 :             :  * If encoding conversion is not required, a shortcut is taken in step 2 to
      34                 :             :  * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
      35                 :             :  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
      36                 :             :  * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
      37                 :             :  * the data is valid in the current encoding.
      38                 :             :  *
      39                 :             :  * In binary mode, the pipeline is much simpler.  Input is loaded into
      40                 :             :  * 'raw_buf', and encoding conversion is done in the datatype-specific
      41                 :             :  * receive functions, if required.  'input_buf' and 'line_buf' are not used,
      42                 :             :  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
      43                 :             :  * data when it's passed the receive function.
      44                 :             :  *
      45                 :             :  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
      46                 :             :  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
      47                 :             :  * and 'attribute_buf' are expanded on demand, to hold the longest line
      48                 :             :  * encountered so far.
      49                 :             :  *
      50                 :             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      51                 :             :  * Portions Copyright (c) 1994, Regents of the University of California
      52                 :             :  *
      53                 :             :  *
      54                 :             :  * IDENTIFICATION
      55                 :             :  *        src/backend/commands/copyfromparse.c
      56                 :             :  *
      57                 :             :  *-------------------------------------------------------------------------
      58                 :             :  */
      59                 :             : #include "postgres.h"
      60                 :             : 
      61                 :             : #include <ctype.h>
      62                 :             : #include <unistd.h>
      63                 :             : #include <sys/stat.h>
      64                 :             : 
      65                 :             : #include "commands/copyapi.h"
      66                 :             : #include "commands/copyfrom_internal.h"
      67                 :             : #include "commands/progress.h"
      68                 :             : #include "executor/executor.h"
      69                 :             : #include "libpq/libpq.h"
      70                 :             : #include "libpq/pqformat.h"
      71                 :             : #include "mb/pg_wchar.h"
      72                 :             : #include "miscadmin.h"
      73                 :             : #include "pgstat.h"
      74                 :             : #include "port/pg_bswap.h"
      75                 :             : #include "utils/builtins.h"
      76                 :             : #include "utils/rel.h"
      77                 :             : 
      78                 :             : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
      79                 :             : #define OCTVALUE(c) ((c) - '0')
      80                 :             : 
      81                 :             : /*
      82                 :             :  * These macros centralize code used to process line_buf and input_buf buffers.
      83                 :             :  * They are macros because they often do continue/break control and to avoid
      84                 :             :  * function call overhead in tight COPY loops.
      85                 :             :  *
      86                 :             :  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
      87                 :             :  * prevent the continue/break processing from working.  We end the "if (1)"
      88                 :             :  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
      89                 :             :  * any "else" in the calling code, and to avoid any compiler warnings about
      90                 :             :  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
      91                 :             :  */
      92                 :             : 
      93                 :             : /*
      94                 :             :  * This keeps the character read at the top of the loop in the buffer
      95                 :             :  * even if there is more than one read-ahead.
      96                 :             :  */
      97                 :             : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
      98                 :             : if (1) \
      99                 :             : { \
     100                 :             :         if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
     101                 :             :         { \
     102                 :             :                 input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
     103                 :             :                 need_data = true; \
     104                 :             :                 continue; \
     105                 :             :         } \
     106                 :             : } else ((void) 0)
     107                 :             : 
     108                 :             : /* This consumes the remainder of the buffer and breaks */
     109                 :             : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
     110                 :             : if (1) \
     111                 :             : { \
     112                 :             :         if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
     113                 :             :         { \
     114                 :             :                 if (extralen) \
     115                 :             :                         input_buf_ptr = copy_buf_len; /* consume the partial character */ \
     116                 :             :                 /* backslash just before EOF, treat as data char */ \
     117                 :             :                 result = true; \
     118                 :             :                 break; \
     119                 :             :         } \
     120                 :             : } else ((void) 0)
     121                 :             : 
     122                 :             : /*
     123                 :             :  * Transfer any approved data to line_buf; must do this to be sure
     124                 :             :  * there is some room in input_buf.
     125                 :             :  */
     126                 :             : #define REFILL_LINEBUF \
     127                 :             : if (1) \
     128                 :             : { \
     129                 :             :         if (input_buf_ptr > cstate->input_buf_index) \
     130                 :             :         { \
     131                 :             :                 appendBinaryStringInfo(&cstate->line_buf, \
     132                 :             :                                                          cstate->input_buf + cstate->input_buf_index, \
     133                 :             :                                                            input_buf_ptr - cstate->input_buf_index); \
     134                 :             :                 cstate->input_buf_index = input_buf_ptr; \
     135                 :             :         } \
     136                 :             : } else ((void) 0)
     137                 :             : 
     138                 :             : /* NOTE: there's a copy of this in copyto.c */
     139                 :             : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
     140                 :             : 
     141                 :             : 
     142                 :             : /* non-export function prototypes */
     143                 :             : static bool CopyReadLine(CopyFromState cstate, bool is_csv);
     144                 :             : static bool CopyReadLineText(CopyFromState cstate, bool is_csv);
     145                 :             : static int      CopyReadAttributesText(CopyFromState cstate);
     146                 :             : static int      CopyReadAttributesCSV(CopyFromState cstate);
     147                 :             : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     148                 :             :                                                                          Oid typioparam, int32 typmod,
     149                 :             :                                                                          bool *isnull);
     150                 :             : static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
     151                 :             :                                                                                                                           ExprContext *econtext,
     152                 :             :                                                                                                                           Datum *values,
     153                 :             :                                                                                                                           bool *nulls,
     154                 :             :                                                                                                                           bool is_csv);
     155                 :             : static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate,
     156                 :             :                                                                                                                                          char ***fields,
     157                 :             :                                                                                                                                          int *nfields,
     158                 :             :                                                                                                                                          bool is_csv);
     159                 :             : 
     160                 :             : 
     161                 :             : /* Low-level communications functions */
     162                 :             : static int      CopyGetData(CopyFromState cstate, void *databuf,
     163                 :             :                                                 int minread, int maxread);
     164                 :             : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
     165                 :             : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
     166                 :             : static void CopyLoadInputBuf(CopyFromState cstate);
     167                 :             : static int      CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
     168                 :             : 
     169                 :             : void
     170                 :         132 : ReceiveCopyBegin(CopyFromState cstate)
     171                 :             : {
     172                 :         132 :         StringInfoData buf;
     173                 :         132 :         int                     natts = list_length(cstate->attnumlist);
     174                 :         132 :         int16           format = (cstate->opts.binary ? 1 : 0);
     175                 :         132 :         int                     i;
     176                 :             : 
     177                 :         132 :         pq_beginmessage(&buf, PqMsg_CopyInResponse);
     178                 :         132 :         pq_sendbyte(&buf, format);  /* overall format */
     179                 :         132 :         pq_sendint16(&buf, natts);
     180         [ +  + ]:         461 :         for (i = 0; i < natts; i++)
     181                 :         329 :                 pq_sendint16(&buf, format); /* per-column formats */
     182                 :         132 :         pq_endmessage(&buf);
     183                 :         132 :         cstate->copy_src = COPY_FRONTEND;
     184                 :         132 :         cstate->fe_msgbuf = makeStringInfo();
     185                 :             :         /* We *must* flush here to ensure FE knows it can send. */
     186                 :         132 :         pq_flush();
     187                 :         132 : }
     188                 :             : 
     189                 :             : void
     190                 :           1 : ReceiveCopyBinaryHeader(CopyFromState cstate)
     191                 :             : {
     192                 :           1 :         char            readSig[11];
     193                 :           1 :         int32           tmp;
     194                 :             : 
     195                 :             :         /* Signature */
     196         [ +  - ]:           1 :         if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
     197                 :           1 :                 memcmp(readSig, BinarySignature, 11) != 0)
     198   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     199                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     200                 :             :                                  errmsg("COPY file signature not recognized")));
     201                 :             :         /* Flags field */
     202         [ +  - ]:           1 :         if (!CopyGetInt32(cstate, &tmp))
     203   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     204                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     205                 :             :                                  errmsg("invalid COPY file header (missing flags)")));
     206         [ +  - ]:           1 :         if ((tmp & (1 << 16)) != 0)
     207   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     208                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     209                 :             :                                  errmsg("invalid COPY file header (WITH OIDS)")));
     210                 :           1 :         tmp &= ~(1 << 16);
     211         [ +  - ]:           1 :         if ((tmp >> 16) != 0)
     212   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     213                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     214                 :             :                                  errmsg("unrecognized critical flags in COPY file header")));
     215                 :             :         /* Header extension length */
     216         [ +  - ]:           1 :         if (!CopyGetInt32(cstate, &tmp) ||
     217                 :           1 :                 tmp < 0)
     218   [ #  #  #  # ]:           0 :                 ereport(ERROR,
     219                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     220                 :             :                                  errmsg("invalid COPY file header (missing length)")));
     221                 :             :         /* Skip extension header, if present */
     222         [ -  + ]:           1 :         while (tmp-- > 0)
     223                 :             :         {
     224         [ #  # ]:           0 :                 if (CopyReadBinaryData(cstate, readSig, 1) != 1)
     225   [ #  #  #  # ]:           0 :                         ereport(ERROR,
     226                 :             :                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     227                 :             :                                          errmsg("invalid COPY file header (wrong length)")));
     228                 :             :         }
     229                 :           1 : }
     230                 :             : 
     231                 :             : /*
     232                 :             :  * CopyGetData reads data from the source (file or frontend)
     233                 :             :  *
     234                 :             :  * We attempt to read at least minread, and at most maxread, bytes from
     235                 :             :  * the source.  The actual number of bytes read is returned; if this is
     236                 :             :  * less than minread, EOF was detected.
     237                 :             :  *
     238                 :             :  * Note: when copying from the frontend, we expect a proper EOF mark per
     239                 :             :  * protocol; if the frontend simply drops the connection, we raise error.
     240                 :             :  * It seems unwise to allow the COPY IN to complete normally in that case.
     241                 :             :  *
     242                 :             :  * NB: no data conversion is applied here.
     243                 :             :  */
     244                 :             : static int
     245                 :         450 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
     246                 :             : {
     247                 :         450 :         int                     bytesread = 0;
     248                 :             : 
     249   [ +  -  +  - ]:         450 :         switch (cstate->copy_src)
     250                 :             :         {
     251                 :             :                 case COPY_FILE:
     252                 :         126 :                         bytesread = fread(databuf, 1, maxread, cstate->copy_file);
     253         [ +  - ]:         126 :                         if (ferror(cstate->copy_file))
     254   [ #  #  #  # ]:           0 :                                 ereport(ERROR,
     255                 :             :                                                 (errcode_for_file_access(),
     256                 :             :                                                  errmsg("could not read from COPY file: %m")));
     257         [ +  + ]:         126 :                         if (bytesread == 0)
     258                 :          44 :                                 cstate->raw_reached_eof = true;
     259                 :         126 :                         break;
     260                 :             :                 case COPY_FRONTEND:
     261   [ +  -  +  +  :         450 :                         while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
                   +  + ]
     262                 :             :                         {
     263                 :         225 :                                 int                     avail;
     264                 :             : 
     265         [ +  + ]:         351 :                                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
     266                 :             :                                 {
     267                 :             :                                         /* Try to receive another message */
     268                 :         225 :                                         int                     mtype;
     269                 :         225 :                                         int                     maxmsglen;
     270                 :             : 
     271                 :             :                         readmessage:
     272                 :         225 :                                         HOLD_CANCEL_INTERRUPTS();
     273                 :         225 :                                         pq_startmsgread();
     274                 :         225 :                                         mtype = pq_getbyte();
     275         [ +  - ]:         225 :                                         if (mtype == EOF)
     276   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
     277                 :             :                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     278                 :             :                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
     279                 :             :                                         /* Validate message type and set packet size limit */
     280      [ +  +  - ]:         225 :                                         switch (mtype)
     281                 :             :                                         {
     282                 :             :                                                 case PqMsg_CopyData:
     283                 :         126 :                                                         maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
     284                 :         126 :                                                         break;
     285                 :             :                                                 case PqMsg_CopyDone:
     286                 :             :                                                 case PqMsg_CopyFail:
     287                 :             :                                                 case PqMsg_Flush:
     288                 :             :                                                 case PqMsg_Sync:
     289                 :          99 :                                                         maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
     290                 :          99 :                                                         break;
     291                 :             :                                                 default:
     292   [ #  #  #  # ]:           0 :                                                         ereport(ERROR,
     293                 :             :                                                                         (errcode(ERRCODE_PROTOCOL_VIOLATION),
     294                 :             :                                                                          errmsg("unexpected message type 0x%02X during COPY from stdin",
     295                 :             :                                                                                         mtype)));
     296                 :           0 :                                                         maxmsglen = 0;  /* keep compiler quiet */
     297                 :           0 :                                                         break;
     298                 :             :                                         }
     299                 :             :                                         /* Now collect the message body */
     300         [ +  - ]:         225 :                                         if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
     301   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
     302                 :             :                                                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     303                 :             :                                                                  errmsg("unexpected EOF on client connection with an open transaction")));
     304         [ -  + ]:         225 :                                         RESUME_CANCEL_INTERRUPTS();
     305                 :             :                                         /* ... and process it */
     306   [ -  -  +  +  :         225 :                                         switch (mtype)
                      - ]
     307                 :             :                                         {
     308                 :             :                                                 case PqMsg_CopyData:
     309                 :             :                                                         break;
     310                 :             :                                                 case PqMsg_CopyDone:
     311                 :             :                                                         /* COPY IN correctly terminated by frontend */
     312                 :          99 :                                                         cstate->raw_reached_eof = true;
     313                 :          99 :                                                         return bytesread;
     314                 :             :                                                 case PqMsg_CopyFail:
     315   [ #  #  #  # ]:           0 :                                                         ereport(ERROR,
     316                 :             :                                                                         (errcode(ERRCODE_QUERY_CANCELED),
     317                 :             :                                                                          errmsg("COPY from stdin failed: %s",
     318                 :             :                                                                                         pq_getmsgstring(cstate->fe_msgbuf))));
     319                 :           0 :                                                         break;
     320                 :             :                                                 case PqMsg_Flush:
     321                 :             :                                                 case PqMsg_Sync:
     322                 :             : 
     323                 :             :                                                         /*
     324                 :             :                                                          * Ignore Flush/Sync for the convenience of client
     325                 :             :                                                          * libraries (such as libpq) that may send those
     326                 :             :                                                          * without noticing that the command they just
     327                 :             :                                                          * sent was COPY.
     328                 :             :                                                          */
     329                 :           0 :                                                         goto readmessage;
     330                 :             :                                                 default:
     331                 :           0 :                                                         Assert(false);  /* NOT REACHED */
     332                 :           0 :                                         }
     333         [ +  + ]:         225 :                                 }
     334                 :         126 :                                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
     335         [ +  - ]:         126 :                                 if (avail > maxread)
     336                 :           0 :                                         avail = maxread;
     337                 :         126 :                                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
     338                 :         126 :                                 databuf = (char *) databuf + avail;
     339                 :         126 :                                 maxread -= avail;
     340                 :         126 :                                 bytesread += avail;
     341         [ +  + ]:         225 :                         }
     342                 :         225 :                         break;
     343                 :             :                 case COPY_CALLBACK:
     344                 :           0 :                         bytesread = cstate->data_source_cb(databuf, minread, maxread);
     345                 :           0 :                         break;
     346                 :             :         }
     347                 :             : 
     348                 :         351 :         return bytesread;
     349                 :         450 : }
     350                 :             : 
     351                 :             : 
     352                 :             : /*
     353                 :             :  * These functions do apply some data conversion
     354                 :             :  */
     355                 :             : 
     356                 :             : /*
     357                 :             :  * CopyGetInt32 reads an int32 that appears in network byte order
     358                 :             :  *
     359                 :             :  * Returns true if OK, false if EOF
     360                 :             :  */
     361                 :             : static inline bool
     362                 :          23 : CopyGetInt32(CopyFromState cstate, int32 *val)
     363                 :             : {
     364                 :          23 :         uint32          buf;
     365                 :             : 
     366         [ -  + ]:          23 :         if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     367                 :             :         {
     368                 :           0 :                 *val = 0;                               /* suppress compiler warning */
     369                 :           0 :                 return false;
     370                 :             :         }
     371                 :          23 :         *val = (int32) pg_ntoh32(buf);
     372                 :          23 :         return true;
     373                 :          23 : }
     374                 :             : 
     375                 :             : /*
     376                 :             :  * CopyGetInt16 reads an int16 that appears in network byte order
     377                 :             :  */
     378                 :             : static inline bool
     379                 :           4 : CopyGetInt16(CopyFromState cstate, int16 *val)
     380                 :             : {
     381                 :           4 :         uint16          buf;
     382                 :             : 
     383         [ -  + ]:           4 :         if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     384                 :             :         {
     385                 :           0 :                 *val = 0;                               /* suppress compiler warning */
     386                 :           0 :                 return false;
     387                 :             :         }
     388                 :           4 :         *val = (int16) pg_ntoh16(buf);
     389                 :           4 :         return true;
     390                 :           4 : }
     391                 :             : 
     392                 :             : 
     393                 :             : /*
     394                 :             :  * Perform encoding conversion on data in 'raw_buf', writing the converted
     395                 :             :  * data into 'input_buf'.
     396                 :             :  *
     397                 :             :  * On entry, there must be some data to convert in 'raw_buf'.
     398                 :             :  */
     399                 :             : static void
     400                 :         696 : CopyConvertBuf(CopyFromState cstate)
     401                 :             : {
     402                 :             :         /*
     403                 :             :          * If the file and server encoding are the same, no encoding conversion is
     404                 :             :          * required.  However, we still need to verify that the input is valid for
     405                 :             :          * the encoding.
     406                 :             :          */
     407         [ +  + ]:         696 :         if (!cstate->need_transcoding)
     408                 :             :         {
     409                 :             :                 /*
     410                 :             :                  * When conversion is not required, input_buf and raw_buf are the
     411                 :             :                  * same.  raw_buf_len is the total number of bytes in the buffer, and
     412                 :             :                  * input_buf_len tracks how many of those bytes have already been
     413                 :             :                  * verified.
     414                 :             :                  */
     415                 :         682 :                 int                     preverifiedlen = cstate->input_buf_len;
     416                 :         682 :                 int                     unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
     417                 :         682 :                 int                     nverified;
     418                 :             : 
     419         [ +  + ]:         682 :                 if (unverifiedlen == 0)
     420                 :             :                 {
     421                 :             :                         /*
     422                 :             :                          * If no more raw data is coming, report the EOF to the caller.
     423                 :             :                          */
     424         [ +  + ]:         479 :                         if (cstate->raw_reached_eof)
     425                 :         138 :                                 cstate->input_reached_eof = true;
     426                 :         479 :                         return;
     427                 :             :                 }
     428                 :             : 
     429                 :             :                 /*
     430                 :             :                  * Verify the new data, including any residual unverified bytes from
     431                 :             :                  * previous round.
     432                 :             :                  */
     433                 :         406 :                 nverified = pg_encoding_verifymbstr(cstate->file_encoding,
     434                 :         203 :                                                                                         cstate->raw_buf + preverifiedlen,
     435                 :         203 :                                                                                         unverifiedlen);
     436         [ +  - ]:         203 :                 if (nverified == 0)
     437                 :             :                 {
     438                 :             :                         /*
     439                 :             :                          * Could not verify anything.
     440                 :             :                          *
     441                 :             :                          * If there is no more raw input data coming, it means that there
     442                 :             :                          * was an incomplete multi-byte sequence at the end.  Also, if
     443                 :             :                          * there's "enough" input left, we should be able to verify at
     444                 :             :                          * least one character, and a failure to do so means that we've
     445                 :             :                          * hit an invalid byte sequence.
     446                 :             :                          */
     447   [ #  #  #  # ]:           0 :                         if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
     448                 :           0 :                                 cstate->input_reached_error = true;
     449                 :           0 :                         return;
     450                 :             :                 }
     451                 :         203 :                 cstate->input_buf_len += nverified;
     452         [ +  + ]:         682 :         }
     453                 :             :         else
     454                 :             :         {
     455                 :             :                 /*
     456                 :             :                  * Encoding conversion is needed.
     457                 :             :                  */
     458                 :          14 :                 int                     nbytes;
     459                 :          14 :                 unsigned char *src;
     460                 :          14 :                 int                     srclen;
     461                 :          14 :                 unsigned char *dst;
     462                 :          14 :                 int                     dstlen;
     463                 :          14 :                 int                     convertedlen;
     464                 :             : 
     465         [ +  + ]:          14 :                 if (RAW_BUF_BYTES(cstate) == 0)
     466                 :             :                 {
     467                 :             :                         /*
     468                 :             :                          * If no more raw data is coming, report the EOF to the caller.
     469                 :             :                          */
     470         [ +  + ]:           8 :                         if (cstate->raw_reached_eof)
     471                 :           2 :                                 cstate->input_reached_eof = true;
     472                 :           8 :                         return;
     473                 :             :                 }
     474                 :             : 
     475                 :             :                 /*
     476                 :             :                  * First, copy down any unprocessed data.
     477                 :             :                  */
     478                 :           6 :                 nbytes = INPUT_BUF_BYTES(cstate);
     479   [ -  +  #  # ]:           6 :                 if (nbytes > 0 && cstate->input_buf_index > 0)
     480                 :           0 :                         memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
     481                 :             :                                         nbytes);
     482                 :           6 :                 cstate->input_buf_index = 0;
     483                 :           6 :                 cstate->input_buf_len = nbytes;
     484                 :           6 :                 cstate->input_buf[nbytes] = '\0';
     485                 :             : 
     486                 :           6 :                 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     487                 :           6 :                 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     488                 :           6 :                 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     489                 :           6 :                 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     490                 :             : 
     491                 :             :                 /*
     492                 :             :                  * Do the conversion.  This might stop short, if there is an invalid
     493                 :             :                  * byte sequence in the input.  We'll convert as much as we can in
     494                 :             :                  * that case.
     495                 :             :                  *
     496                 :             :                  * Note: Even if we hit an invalid byte sequence, we don't report the
     497                 :             :                  * error until all the valid bytes have been consumed.  The input
     498                 :             :                  * might contain an end-of-input marker (\.), and we don't want to
     499                 :             :                  * report an error if the invalid byte sequence is after the
     500                 :             :                  * end-of-input marker.  We might unnecessarily convert some data
     501                 :             :                  * after the end-of-input marker as long as it's valid for the
     502                 :             :                  * encoding, but that's harmless.
     503                 :             :                  */
     504                 :          12 :                 convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
     505                 :           6 :                                                                                                          cstate->file_encoding,
     506                 :           6 :                                                                                                          GetDatabaseEncoding(),
     507                 :           6 :                                                                                                          src, srclen,
     508                 :           6 :                                                                                                          dst, dstlen,
     509                 :             :                                                                                                          true);
     510         [ +  + ]:           6 :                 if (convertedlen == 0)
     511                 :             :                 {
     512                 :             :                         /*
     513                 :             :                          * Could not convert anything.  If there is no more raw input data
     514                 :             :                          * coming, it means that there was an incomplete multi-byte
     515                 :             :                          * sequence at the end.  Also, if there is plenty of input left,
     516                 :             :                          * we should be able to convert at least one character, so a
     517                 :             :                          * failure to do so must mean that we've hit a byte sequence
     518                 :             :                          * that's invalid.
     519                 :             :                          */
     520   [ +  +  -  + ]:           4 :                         if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
     521                 :           2 :                                 cstate->input_reached_error = true;
     522                 :           4 :                         return;
     523                 :             :                 }
     524                 :           2 :                 cstate->raw_buf_index += convertedlen;
     525                 :           2 :                 cstate->input_buf_len += strlen((char *) dst);
     526         [ +  + ]:          14 :         }
     527                 :         696 : }
     528                 :             : 
     529                 :             : /*
     530                 :             :  * Report an encoding or conversion error.
     531                 :             :  */
     532                 :             : static void
     533                 :           0 : CopyConversionError(CopyFromState cstate)
     534                 :             : {
     535         [ #  # ]:           0 :         Assert(cstate->raw_buf_len > 0);
     536         [ #  # ]:           0 :         Assert(cstate->input_reached_error);
     537                 :             : 
     538         [ #  # ]:           0 :         if (!cstate->need_transcoding)
     539                 :             :         {
     540                 :             :                 /*
     541                 :             :                  * Everything up to input_buf_len was successfully verified, and
     542                 :             :                  * input_buf_len points to the invalid or incomplete character.
     543                 :             :                  */
     544                 :           0 :                 report_invalid_encoding(cstate->file_encoding,
     545                 :           0 :                                                                 cstate->raw_buf + cstate->input_buf_len,
     546                 :           0 :                                                                 cstate->raw_buf_len - cstate->input_buf_len);
     547                 :             :         }
     548                 :             :         else
     549                 :             :         {
     550                 :             :                 /*
     551                 :             :                  * raw_buf_index points to the invalid or untranslatable character. We
     552                 :             :                  * let the conversion routine report the error, because it can provide
     553                 :             :                  * a more specific error message than we could here.  An earlier call
     554                 :             :                  * to the conversion routine in CopyConvertBuf() detected that there
     555                 :             :                  * is an error, now we call the conversion routine again with
     556                 :             :                  * noError=false, to have it throw the error.
     557                 :             :                  */
     558                 :           0 :                 unsigned char *src;
     559                 :           0 :                 int                     srclen;
     560                 :           0 :                 unsigned char *dst;
     561                 :           0 :                 int                     dstlen;
     562                 :             : 
     563                 :           0 :                 src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     564                 :           0 :                 srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     565                 :           0 :                 dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     566                 :           0 :                 dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     567                 :             : 
     568                 :           0 :                 (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
     569                 :           0 :                                                                                          cstate->file_encoding,
     570                 :           0 :                                                                                          GetDatabaseEncoding(),
     571                 :           0 :                                                                                          src, srclen,
     572                 :           0 :                                                                                          dst, dstlen,
     573                 :             :                                                                                          false);
     574                 :             : 
     575                 :             :                 /*
     576                 :             :                  * The conversion routine should have reported an error, so this
     577                 :             :                  * should not be reached.
     578                 :             :                  */
     579   [ #  #  #  # ]:           0 :                 elog(ERROR, "encoding conversion failed without error");
     580                 :           0 :         }
     581                 :           0 : }
     582                 :             : 
     583                 :             : /*
     584                 :             :  * Load more data from data source to raw_buf.
     585                 :             :  *
     586                 :             :  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
     587                 :             :  * beginning of the buffer, and we load new data after that.
     588                 :             :  */
     589                 :             : static void
     590                 :         351 : CopyLoadRawBuf(CopyFromState cstate)
     591                 :             : {
     592                 :         351 :         int                     nbytes;
     593                 :         351 :         int                     inbytes;
     594                 :             : 
     595                 :             :         /*
     596                 :             :          * In text mode, if encoding conversion is not required, raw_buf and
     597                 :             :          * input_buf point to the same buffer.  Their len/index better agree, too.
     598                 :             :          */
     599         [ +  + ]:         351 :         if (cstate->raw_buf == cstate->input_buf)
     600                 :             :         {
     601         [ +  - ]:         341 :                 Assert(!cstate->need_transcoding);
     602         [ +  - ]:         341 :                 Assert(cstate->raw_buf_index == cstate->input_buf_index);
     603         [ +  - ]:         341 :                 Assert(cstate->input_buf_len <= cstate->raw_buf_len);
     604                 :         341 :         }
     605                 :             : 
     606                 :             :         /*
     607                 :             :          * Copy down the unprocessed data if any.
     608                 :             :          */
     609                 :         351 :         nbytes = RAW_BUF_BYTES(cstate);
     610   [ +  +  +  - ]:         351 :         if (nbytes > 0 && cstate->raw_buf_index > 0)
     611                 :           0 :                 memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
     612                 :             :                                 nbytes);
     613                 :         351 :         cstate->raw_buf_len -= cstate->raw_buf_index;
     614                 :         351 :         cstate->raw_buf_index = 0;
     615                 :             : 
     616                 :             :         /*
     617                 :             :          * If raw_buf and input_buf are in fact the same buffer, adjust the
     618                 :             :          * input_buf variables, too.
     619                 :             :          */
     620         [ +  + ]:         351 :         if (cstate->raw_buf == cstate->input_buf)
     621                 :             :         {
     622                 :         341 :                 cstate->input_buf_len -= cstate->input_buf_index;
     623                 :         341 :                 cstate->input_buf_index = 0;
     624                 :         341 :         }
     625                 :             : 
     626                 :             :         /* Load more data */
     627                 :         702 :         inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
     628                 :         351 :                                                   1, RAW_BUF_SIZE - cstate->raw_buf_len);
     629                 :         351 :         nbytes += inbytes;
     630                 :         351 :         cstate->raw_buf[nbytes] = '\0';
     631                 :         351 :         cstate->raw_buf_len = nbytes;
     632                 :             : 
     633                 :         351 :         cstate->bytes_processed += inbytes;
     634                 :         351 :         pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
     635                 :             : 
     636         [ +  + ]:         351 :         if (inbytes == 0)
     637                 :         143 :                 cstate->raw_reached_eof = true;
     638                 :         351 : }
     639                 :             : 
     640                 :             : /*
     641                 :             :  * CopyLoadInputBuf loads some more data into input_buf
     642                 :             :  *
     643                 :             :  * On return, at least one more input character is loaded into
     644                 :             :  * input_buf, or input_reached_eof is set.
     645                 :             :  *
     646                 :             :  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
     647                 :             :  * of the buffer and then we load more data after that.
     648                 :             :  */
     649                 :             : static void
     650                 :         345 : CopyLoadInputBuf(CopyFromState cstate)
     651                 :             : {
     652                 :         345 :         int                     nbytes = INPUT_BUF_BYTES(cstate);
     653                 :             : 
     654                 :             :         /*
     655                 :             :          * The caller has updated input_buf_index to indicate how much of the
     656                 :             :          * input has been consumed and isn't needed anymore.  If input_buf is the
     657                 :             :          * same physical area as raw_buf, update raw_buf_index accordingly.
     658                 :             :          */
     659         [ +  + ]:         345 :         if (cstate->raw_buf == cstate->input_buf)
     660                 :             :         {
     661         [ +  - ]:         341 :                 Assert(!cstate->need_transcoding);
     662         [ +  - ]:         341 :                 Assert(cstate->input_buf_index >= cstate->raw_buf_index);
     663                 :         341 :                 cstate->raw_buf_index = cstate->input_buf_index;
     664                 :         341 :         }
     665                 :             : 
     666                 :         694 :         for (;;)
     667                 :             :         {
     668                 :             :                 /* If we now have some unconverted data, try to convert it */
     669                 :         694 :                 CopyConvertBuf(cstate);
     670                 :             : 
     671                 :             :                 /* If we now have some more input bytes ready, return them */
     672         [ +  + ]:         694 :                 if (INPUT_BUF_BYTES(cstate) > nbytes)
     673                 :         205 :                         return;
     674                 :             : 
     675                 :             :                 /*
     676                 :             :                  * If we reached an invalid byte sequence, or we're at an incomplete
     677                 :             :                  * multi-byte character but there is no more raw input data, report
     678                 :             :                  * conversion error.
     679                 :             :                  */
     680         [ +  + ]:         489 :                 if (cstate->input_reached_error)
     681                 :           2 :                         CopyConversionError(cstate);
     682                 :             : 
     683                 :             :                 /* no more input, and everything has been converted */
     684         [ +  + ]:         489 :                 if (cstate->input_reached_eof)
     685                 :         140 :                         break;
     686                 :             : 
     687                 :             :                 /* Try to load more raw data */
     688         [ -  + ]:         349 :                 Assert(!cstate->raw_reached_eof);
     689                 :         349 :                 CopyLoadRawBuf(cstate);
     690                 :             :         }
     691         [ -  + ]:         345 : }
     692                 :             : 
     693                 :             : /*
     694                 :             :  * CopyReadBinaryData
     695                 :             :  *
     696                 :             :  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
     697                 :             :  * and writes them to 'dest'.  Returns the number of bytes read (which
     698                 :             :  * would be less than 'nbytes' only if we reach EOF).
     699                 :             :  */
     700                 :             : static int
     701                 :          45 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
     702                 :             : {
     703                 :          45 :         int                     copied_bytes = 0;
     704                 :             : 
     705         [ +  + ]:          45 :         if (RAW_BUF_BYTES(cstate) >= nbytes)
     706                 :             :         {
     707                 :             :                 /* Enough bytes are present in the buffer. */
     708                 :          43 :                 memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
     709                 :          43 :                 cstate->raw_buf_index += nbytes;
     710                 :          43 :                 copied_bytes = nbytes;
     711                 :          43 :         }
     712                 :             :         else
     713                 :             :         {
     714                 :             :                 /*
     715                 :             :                  * Not enough bytes in the buffer, so must read from the file.  Need
     716                 :             :                  * to loop since 'nbytes' could be larger than the buffer size.
     717                 :             :                  */
     718                 :           2 :                 do
     719                 :             :                 {
     720                 :           2 :                         int                     copy_bytes;
     721                 :             : 
     722                 :             :                         /* Load more data if buffer is empty. */
     723         [ -  + ]:           2 :                         if (RAW_BUF_BYTES(cstate) == 0)
     724                 :             :                         {
     725                 :           2 :                                 CopyLoadRawBuf(cstate);
     726         [ +  + ]:           2 :                                 if (cstate->raw_reached_eof)
     727                 :           1 :                                         break;          /* EOF */
     728                 :           1 :                         }
     729                 :             : 
     730                 :             :                         /* Transfer some bytes. */
     731         [ +  - ]:           1 :                         copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
     732                 :           1 :                         memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
     733                 :           1 :                         cstate->raw_buf_index += copy_bytes;
     734                 :           1 :                         dest += copy_bytes;
     735                 :           1 :                         copied_bytes += copy_bytes;
     736   [ -  +  +  -  :           2 :                 } while (copied_bytes < nbytes);
                      + ]
     737                 :             :         }
     738                 :             : 
     739                 :          90 :         return copied_bytes;
     740                 :          45 : }
     741                 :             : 
     742                 :             : /*
     743                 :             :  * This function is exposed for use by extensions that read raw fields in the
     744                 :             :  * next line. See NextCopyFromRawFieldsInternal() for details.
     745                 :             :  */
     746                 :             : bool
     747                 :           0 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
     748                 :             : {
     749                 :           0 :         return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
     750                 :           0 :                                                                                  cstate->opts.csv_mode);
     751                 :             : }
     752                 :             : 
     753                 :             : /*
     754                 :             :  * Workhorse for NextCopyFromRawFields().
     755                 :             :  *
     756                 :             :  * Read raw fields in the next line for COPY FROM in text or csv mode. Return
     757                 :             :  * false if no more lines.
     758                 :             :  *
     759                 :             :  * An internal temporary buffer is returned via 'fields'. It is valid until
     760                 :             :  * the next call of the function. Since the function returns all raw fields
     761                 :             :  * in the input file, 'nfields' could be different from the number of columns
     762                 :             :  * in the relation.
     763                 :             :  *
     764                 :             :  * NOTE: force_not_null option are not applied to the returned fields.
     765                 :             :  *
     766                 :             :  * We use pg_attribute_always_inline to reduce function call overhead
     767                 :             :  * and to help compilers to optimize away the 'is_csv' condition when called
     768                 :             :  * by internal functions such as CopyFromTextLikeOneRow().
     769                 :             :  */
     770                 :             : static pg_attribute_always_inline bool
     771                 :      115622 : NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
     772                 :             : {
     773                 :      115622 :         int                     fldct;
     774                 :      115622 :         bool            done = false;
     775                 :             : 
     776                 :             :         /* only available for text or csv input */
     777         [ +  - ]:      115622 :         Assert(!cstate->opts.binary);
     778                 :             : 
     779                 :             :         /* on input check that the header line is correct if needed */
     780   [ +  +  +  + ]:      115622 :         if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
     781                 :             :         {
     782                 :          18 :                 ListCell   *cur;
     783                 :          18 :                 TupleDesc       tupDesc;
     784                 :          18 :                 int                     lines_to_skip = cstate->opts.header_line;
     785                 :             : 
     786                 :             :                 /* If set to "match", one header line is skipped */
     787         [ +  + ]:          18 :                 if (cstate->opts.header_line == COPY_HEADER_MATCH)
     788                 :          12 :                         lines_to_skip = 1;
     789                 :             : 
     790                 :          18 :                 tupDesc = RelationGetDescr(cstate->rel);
     791                 :             : 
     792         [ +  + ]:          44 :                 for (int i = 0; i < lines_to_skip; i++)
     793                 :             :                 {
     794                 :          26 :                         cstate->cur_lineno++;
     795         [ +  + ]:          26 :                         if ((done = CopyReadLine(cstate, is_csv)))
     796                 :           1 :                                 break;
     797                 :          25 :                 }
     798                 :             : 
     799         [ +  + ]:          18 :                 if (cstate->opts.header_line == COPY_HEADER_MATCH)
     800                 :             :                 {
     801                 :          12 :                         int                     fldnum;
     802                 :             : 
     803         [ +  + ]:          12 :                         if (is_csv)
     804                 :           1 :                                 fldct = CopyReadAttributesCSV(cstate);
     805                 :             :                         else
     806                 :          11 :                                 fldct = CopyReadAttributesText(cstate);
     807                 :             : 
     808         [ +  + ]:          12 :                         if (fldct != list_length(cstate->attnumlist))
     809   [ +  -  +  - ]:           4 :                                 ereport(ERROR,
     810                 :             :                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     811                 :             :                                                  errmsg("wrong number of fields in header line: got %d, expected %d",
     812                 :             :                                                                 fldct, list_length(cstate->attnumlist))));
     813                 :             : 
     814                 :           8 :                         fldnum = 0;
     815   [ +  -  +  +  :          25 :                         foreach(cur, cstate->attnumlist)
                   +  + ]
     816                 :             :                         {
     817                 :          20 :                                 int                     attnum = lfirst_int(cur);
     818                 :          20 :                                 char       *colName;
     819                 :          20 :                                 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
     820                 :             : 
     821         [ +  - ]:          20 :                                 Assert(fldnum < cstate->max_fields);
     822                 :             : 
     823                 :          20 :                                 colName = cstate->raw_fields[fldnum++];
     824         [ +  + ]:          20 :                                 if (colName == NULL)
     825   [ +  -  +  - ]:           1 :                                         ereport(ERROR,
     826                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     827                 :             :                                                          errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
     828                 :             :                                                                         fldnum, cstate->opts.null_print, NameStr(attr->attname))));
     829                 :             : 
     830         [ +  + ]:          19 :                                 if (namestrcmp(&attr->attname, colName) != 0)
     831                 :             :                                 {
     832   [ +  -  +  - ]:           2 :                                         ereport(ERROR,
     833                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     834                 :             :                                                          errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
     835                 :             :                                                                         fldnum, colName, NameStr(attr->attname))));
     836                 :           0 :                                 }
     837                 :          17 :                         }
     838                 :           5 :                 }
     839                 :             : 
     840         [ +  + ]:          11 :                 if (done)
     841                 :           1 :                         return false;
     842         [ +  + ]:          11 :         }
     843                 :             : 
     844                 :      115614 :         cstate->cur_lineno++;
     845                 :             : 
     846                 :             :         /* Actually read the line into memory here */
     847                 :      115614 :         done = CopyReadLine(cstate, is_csv);
     848                 :             : 
     849                 :             :         /*
     850                 :             :          * EOF at start of line means we're done.  If we see EOF after some
     851                 :             :          * characters, we act as though it was newline followed by EOF, ie,
     852                 :             :          * process the line and then exit loop on next iteration.
     853                 :             :          */
     854   [ +  +  -  + ]:      115614 :         if (done && cstate->line_buf.len == 0)
     855                 :         139 :                 return false;
     856                 :             : 
     857                 :             :         /* Parse the line into de-escaped field values */
     858         [ +  + ]:      115475 :         if (is_csv)
     859                 :          55 :                 fldct = CopyReadAttributesCSV(cstate);
     860                 :             :         else
     861                 :      115420 :                 fldct = CopyReadAttributesText(cstate);
     862                 :             : 
     863                 :      115475 :         *fields = cstate->raw_fields;
     864                 :      115475 :         *nfields = fldct;
     865                 :      115475 :         return true;
     866                 :      115615 : }
     867                 :             : 
     868                 :             : /*
     869                 :             :  * Read next tuple from file for COPY FROM. Return false if no more tuples.
     870                 :             :  *
     871                 :             :  * 'econtext' is used to evaluate default expression for each column that is
     872                 :             :  * either not read from the file or is using the DEFAULT option of COPY FROM.
     873                 :             :  * It can be NULL when no default values are used, i.e. when all columns are
     874                 :             :  * read from the file, and DEFAULT option is unset.
     875                 :             :  *
     876                 :             :  * 'values' and 'nulls' arrays must be the same length as columns of the
     877                 :             :  * relation passed to BeginCopyFrom. This function fills the arrays.
     878                 :             :  */
     879                 :             : bool
     880                 :      115655 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
     881                 :             :                          Datum *values, bool *nulls)
     882                 :             : {
     883                 :      115655 :         TupleDesc       tupDesc;
     884                 :      115655 :         AttrNumber      num_phys_attrs,
     885                 :      115655 :                                 num_defaults = cstate->num_defaults;
     886                 :      115655 :         int                     i;
     887                 :      115655 :         int                *defmap = cstate->defmap;
     888                 :      115655 :         ExprState **defexprs = cstate->defexprs;
     889                 :             : 
     890                 :      115655 :         tupDesc = RelationGetDescr(cstate->rel);
     891                 :      115655 :         num_phys_attrs = tupDesc->natts;
     892                 :             : 
     893                 :             :         /* Initialize all values for row to NULL */
     894   [ +  +  +  -  :      527153 :         MemSet(values, 0, num_phys_attrs * sizeof(Datum));
          +  -  +  -  +  
                      + ]
     895   [ +  +  +  +  :      115655 :         MemSet(nulls, true, num_phys_attrs * sizeof(bool));
          -  +  #  #  #  
                      # ]
     896   [ +  +  +  +  :      139673 :         MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
          +  -  +  -  +  
                      + ]
     897                 :             : 
     898                 :             :         /* Get one row from source */
     899         [ +  + ]:      115605 :         if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
     900                 :         141 :                 return false;
     901                 :             : 
     902                 :             :         /*
     903                 :             :          * Now compute and insert any defaults available for the columns not
     904                 :             :          * provided by the input data.  Anything not processed here or above will
     905                 :             :          * remain NULL.
     906                 :             :          */
     907         [ +  + ]:      125544 :         for (i = 0; i < num_defaults; i++)
     908                 :             :         {
     909                 :             :                 /*
     910                 :             :                  * The caller must supply econtext and have switched into the
     911                 :             :                  * per-tuple memory context in it.
     912                 :             :                  */
     913         [ +  - ]:       10080 :                 Assert(econtext != NULL);
     914         [ +  - ]:       10080 :                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
     915                 :             : 
     916                 :       20160 :                 values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
     917                 :       10080 :                                                                                  &nulls[defmap[i]]);
     918                 :       10080 :         }
     919                 :             : 
     920                 :      115464 :         return true;
     921                 :      115605 : }
     922                 :             : 
     923                 :             : /* Implementation of the per-row callback for text format */
     924                 :             : bool
     925                 :      115540 : CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     926                 :             :                                    bool *nulls)
     927                 :             : {
     928                 :      115540 :         return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
     929                 :             : }
     930                 :             : 
     931                 :             : /* Implementation of the per-row callback for CSV format */
     932                 :             : bool
     933                 :          86 : CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     934                 :             :                                   bool *nulls)
     935                 :             : {
     936                 :          86 :         return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
     937                 :             : }
     938                 :             : 
     939                 :             : /*
     940                 :             :  * Workhorse for CopyFromTextOneRow() and CopyFromCSVOneRow().
     941                 :             :  *
     942                 :             :  * We use pg_attribute_always_inline to reduce function call overhead
     943                 :             :  * and to help compilers to optimize away the 'is_csv' condition.
     944                 :             :  */
     945                 :             : static pg_attribute_always_inline bool
     946                 :      115613 : CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
     947                 :             :                                            Datum *values, bool *nulls, bool is_csv)
     948                 :             : {
     949                 :      115613 :         TupleDesc       tupDesc;
     950                 :      115613 :         AttrNumber      attr_count;
     951                 :      115613 :         FmgrInfo   *in_functions = cstate->in_functions;
     952                 :      115613 :         Oid                *typioparams = cstate->typioparams;
     953                 :      115613 :         ExprState **defexprs = cstate->defexprs;
     954                 :      115613 :         char      **field_strings;
     955                 :      115613 :         ListCell   *cur;
     956                 :      115613 :         int                     fldct;
     957                 :      115613 :         int                     fieldno;
     958                 :      115613 :         char       *string;
     959                 :             : 
     960                 :      115613 :         tupDesc = RelationGetDescr(cstate->rel);
     961                 :      115613 :         attr_count = list_length(cstate->attnumlist);
     962                 :             : 
     963                 :             :         /* read raw fields in the next line */
     964         [ +  + ]:      115613 :         if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))
     965                 :         140 :                 return false;
     966                 :             : 
     967                 :             :         /* check for overflowing fields */
     968   [ +  -  +  + ]:      115473 :         if (attr_count > 0 && fldct > attr_count)
     969   [ +  -  +  - ]:           3 :                 ereport(ERROR,
     970                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     971                 :             :                                  errmsg("extra data after last expected column")));
     972                 :             : 
     973                 :      115470 :         fieldno = 0;
     974                 :             : 
     975                 :             :         /* Loop to read the user attributes on the line. */
     976   [ +  +  +  +  :      515502 :         foreach(cur, cstate->attnumlist)
             +  +  +  + ]
     977                 :             :         {
     978                 :      400035 :                 int                     attnum = lfirst_int(cur);
     979                 :      400035 :                 int                     m = attnum - 1;
     980                 :      400035 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
     981                 :             : 
     982         [ +  + ]:      400035 :                 if (fieldno >= fldct)
     983   [ +  -  +  - ]:           3 :                         ereport(ERROR,
     984                 :             :                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     985                 :             :                                          errmsg("missing data for column \"%s\"",
     986                 :             :                                                         NameStr(att->attname))));
     987                 :      400032 :                 string = field_strings[fieldno++];
     988                 :             : 
     989   [ -  +  #  # ]:      400032 :                 if (cstate->convert_select_flags &&
     990                 :           0 :                         !cstate->convert_select_flags[m])
     991                 :             :                 {
     992                 :             :                         /* ignore input field, leaving column as NULL */
     993                 :           0 :                         continue;
     994                 :             :                 }
     995                 :             : 
     996         [ +  + ]:      400032 :                 if (is_csv)
     997                 :             :                 {
     998   [ +  +  +  + ]:         107 :                         if (string == NULL &&
     999                 :           5 :                                 cstate->opts.force_notnull_flags[m])
    1000                 :             :                         {
    1001                 :             :                                 /*
    1002                 :             :                                  * FORCE_NOT_NULL option is set and column is NULL - convert
    1003                 :             :                                  * it to the NULL string.
    1004                 :             :                                  */
    1005                 :           4 :                                 string = cstate->opts.null_print;
    1006                 :           4 :                         }
    1007         [ +  + ]:         103 :                         else if (string != NULL && cstate->opts.force_null_flags[m]
    1008   [ +  +  +  + ]:         102 :                                          && strcmp(string, cstate->opts.null_print) == 0)
    1009                 :             :                         {
    1010                 :             :                                 /*
    1011                 :             :                                  * FORCE_NULL option is set and column matches the NULL
    1012                 :             :                                  * string. It must have been quoted, or otherwise the string
    1013                 :             :                                  * would already have been set to NULL. Convert it to NULL as
    1014                 :             :                                  * specified.
    1015                 :             :                                  */
    1016                 :           4 :                                 string = NULL;
    1017                 :           4 :                         }
    1018                 :         107 :                 }
    1019                 :             : 
    1020                 :      400032 :                 cstate->cur_attname = NameStr(att->attname);
    1021                 :      400032 :                 cstate->cur_attval = string;
    1022                 :             : 
    1023         [ +  + ]:      400032 :                 if (string != NULL)
    1024                 :      399712 :                         nulls[m] = false;
    1025                 :             : 
    1026         [ +  + ]:      400032 :                 if (cstate->defaults[m])
    1027                 :             :                 {
    1028                 :             :                         /* We must have switched into the per-tuple memory context */
    1029         [ -  + ]:           8 :                         Assert(econtext != NULL);
    1030         [ -  + ]:           8 :                         Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
    1031                 :             : 
    1032                 :           8 :                         values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
    1033                 :           8 :                 }
    1034                 :             : 
    1035                 :             :                 /*
    1036                 :             :                  * If ON_ERROR is specified with IGNORE, skip rows with soft errors
    1037                 :             :                  */
    1038   [ +  +  +  + ]:      800048 :                 else if (!InputFunctionCallSafe(&in_functions[m],
    1039                 :      400024 :                                                                                 string,
    1040                 :      400024 :                                                                                 typioparams[m],
    1041                 :      400024 :                                                                                 att->atttypmod,
    1042                 :      400024 :                                                                                 (Node *) cstate->escontext,
    1043                 :      400024 :                                                                                 &values[m]))
    1044                 :             :                 {
    1045         [ +  - ]:          18 :                         Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
    1046                 :             : 
    1047                 :          18 :                         cstate->num_errors++;
    1048                 :             : 
    1049         [ +  + ]:          18 :                         if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
    1050                 :             :                         {
    1051                 :             :                                 /*
    1052                 :             :                                  * Since we emit line number and column info in the below
    1053                 :             :                                  * notice message, we suppress error context information other
    1054                 :             :                                  * than the relation name.
    1055                 :             :                                  */
    1056         [ -  + ]:           7 :                                 Assert(!cstate->relname_only);
    1057                 :           7 :                                 cstate->relname_only = true;
    1058                 :             : 
    1059         [ +  + ]:           7 :                                 if (cstate->cur_attval)
    1060                 :             :                                 {
    1061                 :           6 :                                         char       *attval;
    1062                 :             : 
    1063                 :           6 :                                         attval = CopyLimitPrintoutLength(cstate->cur_attval);
    1064   [ -  +  +  - ]:           6 :                                         ereport(NOTICE,
    1065                 :             :                                                         errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1066                 :             :                                                                    cstate->cur_lineno,
    1067                 :             :                                                                    cstate->cur_attname,
    1068                 :             :                                                                    attval));
    1069                 :           6 :                                         pfree(attval);
    1070                 :           6 :                                 }
    1071                 :             :                                 else
    1072   [ -  +  +  - ]:           1 :                                         ereport(NOTICE,
    1073                 :             :                                                         errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
    1074                 :             :                                                                    cstate->cur_lineno,
    1075                 :             :                                                                    cstate->cur_attname));
    1076                 :             : 
    1077                 :             :                                 /* reset relname_only */
    1078                 :           7 :                                 cstate->relname_only = false;
    1079                 :           7 :                         }
    1080                 :             : 
    1081                 :          18 :                         return true;
    1082                 :             :                 }
    1083                 :             : 
    1084                 :      400014 :                 cstate->cur_attname = NULL;
    1085                 :      400014 :                 cstate->cur_attval = NULL;
    1086      [ +  +  + ]:      400032 :         }
    1087                 :             : 
    1088         [ +  - ]:      115443 :         Assert(fieldno == attr_count);
    1089                 :             : 
    1090                 :      115443 :         return true;
    1091                 :      115595 : }
    1092                 :             : 
    1093                 :             : /* Implementation of the per-row callback for binary format */
    1094                 :             : bool
    1095                 :           4 : CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
    1096                 :             :                                          bool *nulls)
    1097                 :             : {
    1098                 :           4 :         TupleDesc       tupDesc;
    1099                 :           4 :         AttrNumber      attr_count;
    1100                 :           4 :         FmgrInfo   *in_functions = cstate->in_functions;
    1101                 :           4 :         Oid                *typioparams = cstate->typioparams;
    1102                 :           4 :         int16           fld_count;
    1103                 :           4 :         ListCell   *cur;
    1104                 :             : 
    1105                 :           4 :         tupDesc = RelationGetDescr(cstate->rel);
    1106                 :           4 :         attr_count = list_length(cstate->attnumlist);
    1107                 :             : 
    1108                 :           4 :         cstate->cur_lineno++;
    1109                 :             : 
    1110         [ -  + ]:           4 :         if (!CopyGetInt16(cstate, &fld_count))
    1111                 :             :         {
    1112                 :             :                 /* EOF detected (end of file, or protocol-level EOF) */
    1113                 :           0 :                 return false;
    1114                 :             :         }
    1115                 :             : 
    1116         [ +  + ]:           4 :         if (fld_count == -1)
    1117                 :             :         {
    1118                 :             :                 /*
    1119                 :             :                  * Received EOF marker.  Wait for the protocol-level EOF, and complain
    1120                 :             :                  * if it doesn't come immediately.  In COPY FROM STDIN, this ensures
    1121                 :             :                  * that we correctly handle CopyFail, if client chooses to send that
    1122                 :             :                  * now.  When copying from file, we could ignore the rest of the file
    1123                 :             :                  * like in text mode, but we choose to be consistent with the COPY
    1124                 :             :                  * FROM STDIN case.
    1125                 :             :                  */
    1126                 :           1 :                 char            dummy;
    1127                 :             : 
    1128         [ +  - ]:           1 :                 if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
    1129   [ #  #  #  # ]:           0 :                         ereport(ERROR,
    1130                 :             :                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1131                 :             :                                          errmsg("received copy data after EOF marker")));
    1132                 :           1 :                 return false;
    1133                 :           1 :         }
    1134                 :             : 
    1135         [ +  - ]:           3 :         if (fld_count != attr_count)
    1136   [ #  #  #  # ]:           0 :                 ereport(ERROR,
    1137                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1138                 :             :                                  errmsg("row field count is %d, expected %d",
    1139                 :             :                                                 fld_count, attr_count)));
    1140                 :             : 
    1141   [ +  -  +  +  :          24 :         foreach(cur, cstate->attnumlist)
                   +  + ]
    1142                 :             :         {
    1143                 :          21 :                 int                     attnum = lfirst_int(cur);
    1144                 :          21 :                 int                     m = attnum - 1;
    1145                 :          21 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1146                 :             : 
    1147                 :          21 :                 cstate->cur_attname = NameStr(att->attname);
    1148                 :          42 :                 values[m] = CopyReadBinaryAttribute(cstate,
    1149                 :          21 :                                                                                         &in_functions[m],
    1150                 :          21 :                                                                                         typioparams[m],
    1151                 :          21 :                                                                                         att->atttypmod,
    1152                 :          21 :                                                                                         &nulls[m]);
    1153                 :          21 :                 cstate->cur_attname = NULL;
    1154                 :          21 :         }
    1155                 :             : 
    1156                 :           3 :         return true;
    1157                 :           4 : }
    1158                 :             : 
    1159                 :             : /*
    1160                 :             :  * Read the next input line and stash it in line_buf.
    1161                 :             :  *
    1162                 :             :  * Result is true if read was terminated by EOF, false if terminated
    1163                 :             :  * by newline.  The terminating newline or EOF marker is not included
    1164                 :             :  * in the final value of line_buf.
    1165                 :             :  */
    1166                 :             : static bool
    1167                 :      115640 : CopyReadLine(CopyFromState cstate, bool is_csv)
    1168                 :             : {
    1169                 :      115640 :         bool            result;
    1170                 :             : 
    1171                 :      115640 :         resetStringInfo(&cstate->line_buf);
    1172                 :      115640 :         cstate->line_buf_valid = false;
    1173                 :             : 
    1174                 :             :         /* Parse data and transfer into line_buf */
    1175                 :      115640 :         result = CopyReadLineText(cstate, is_csv);
    1176                 :             : 
    1177         [ +  + ]:      115640 :         if (result)
    1178                 :             :         {
    1179                 :             :                 /*
    1180                 :             :                  * Reached EOF.  In protocol version 3, we should ignore anything
    1181                 :             :                  * after \. up to the protocol end of copy data.  (XXX maybe better
    1182                 :             :                  * not to treat \. as special?)
    1183                 :             :                  */
    1184         [ +  + ]:         140 :                 if (cstate->copy_src == COPY_FRONTEND)
    1185                 :             :                 {
    1186                 :          99 :                         int                     inbytes;
    1187                 :             : 
    1188                 :          99 :                         do
    1189                 :             :                         {
    1190                 :          99 :                                 inbytes = CopyGetData(cstate, cstate->input_buf,
    1191                 :             :                                                                           1, INPUT_BUF_SIZE);
    1192         [ -  + ]:          99 :                         } while (inbytes > 0);
    1193                 :          99 :                         cstate->input_buf_index = 0;
    1194                 :          99 :                         cstate->input_buf_len = 0;
    1195                 :          99 :                         cstate->raw_buf_index = 0;
    1196                 :          99 :                         cstate->raw_buf_len = 0;
    1197                 :          99 :                 }
    1198                 :         140 :         }
    1199                 :             :         else
    1200                 :             :         {
    1201                 :             :                 /*
    1202                 :             :                  * If we didn't hit EOF, then we must have transferred the EOL marker
    1203                 :             :                  * to line_buf along with the data.  Get rid of it.
    1204                 :             :                  */
    1205   [ -  +  -  -  :      115500 :                 switch (cstate->eol_type)
                      - ]
    1206                 :             :                 {
    1207                 :             :                         case EOL_NL:
    1208         [ +  - ]:      115500 :                                 Assert(cstate->line_buf.len >= 1);
    1209         [ +  - ]:      115500 :                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1210                 :      115500 :                                 cstate->line_buf.len--;
    1211                 :      115500 :                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1212                 :      115500 :                                 break;
    1213                 :             :                         case EOL_CR:
    1214         [ #  # ]:           0 :                                 Assert(cstate->line_buf.len >= 1);
    1215         [ #  # ]:           0 :                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
    1216                 :           0 :                                 cstate->line_buf.len--;
    1217                 :           0 :                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1218                 :           0 :                                 break;
    1219                 :             :                         case EOL_CRNL:
    1220         [ #  # ]:           0 :                                 Assert(cstate->line_buf.len >= 2);
    1221         [ #  # ]:           0 :                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
    1222         [ #  # ]:           0 :                                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1223                 :           0 :                                 cstate->line_buf.len -= 2;
    1224                 :           0 :                                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1225                 :           0 :                                 break;
    1226                 :             :                         case EOL_UNKNOWN:
    1227                 :             :                                 /* shouldn't get here */
    1228                 :           0 :                                 Assert(false);
    1229                 :           0 :                                 break;
    1230                 :             :                 }
    1231                 :             :         }
    1232                 :             : 
    1233                 :             :         /* Now it's safe to use the buffer in error messages */
    1234                 :      115640 :         cstate->line_buf_valid = true;
    1235                 :             : 
    1236                 :      231280 :         return result;
    1237                 :      115640 : }
    1238                 :             : 
    1239                 :             : /*
    1240                 :             :  * CopyReadLineText - inner loop of CopyReadLine for text mode
    1241                 :             :  */
    1242                 :             : static bool
    1243                 :      115644 : CopyReadLineText(CopyFromState cstate, bool is_csv)
    1244                 :             : {
    1245                 :      115644 :         char       *copy_input_buf;
    1246                 :      115644 :         int                     input_buf_ptr;
    1247                 :      115644 :         int                     copy_buf_len;
    1248                 :      115644 :         bool            need_data = false;
    1249                 :      115644 :         bool            hit_eof = false;
    1250                 :      115644 :         bool            result = false;
    1251                 :             : 
    1252                 :             :         /* CSV variables */
    1253                 :      115644 :         bool            in_quote = false,
    1254                 :      115644 :                                 last_was_esc = false;
    1255                 :      115644 :         char            quotec = '\0';
    1256                 :      115644 :         char            escapec = '\0';
    1257                 :             : 
    1258         [ +  + ]:      115644 :         if (is_csv)
    1259                 :             :         {
    1260                 :          99 :                 quotec = cstate->opts.quote[0];
    1261                 :          99 :                 escapec = cstate->opts.escape[0];
    1262                 :             :                 /* ignore special escape processing if it's the same as quotec */
    1263         [ +  + ]:          99 :                 if (quotec == escapec)
    1264                 :          94 :                         escapec = '\0';
    1265                 :          99 :         }
    1266                 :             : 
    1267                 :             :         /*
    1268                 :             :          * The objective of this loop is to transfer the entire next input line
    1269                 :             :          * into line_buf.  Hence, we only care for detecting newlines (\r and/or
    1270                 :             :          * \n) and the end-of-copy marker (\.).
    1271                 :             :          *
    1272                 :             :          * In CSV mode, \r and \n inside a quoted field are just part of the data
    1273                 :             :          * value and are put in line_buf.  We keep just enough state to know if we
    1274                 :             :          * are currently in a quoted field or not.
    1275                 :             :          *
    1276                 :             :          * The input has already been converted to the database encoding.  All
    1277                 :             :          * supported server encodings have the property that all bytes in a
    1278                 :             :          * multi-byte sequence have the high bit set, so a multibyte character
    1279                 :             :          * cannot contain any newline or escape characters embedded in the
    1280                 :             :          * multibyte sequence.  Therefore, we can process the input byte-by-byte,
    1281                 :             :          * regardless of the encoding.
    1282                 :             :          *
    1283                 :             :          * For speed, we try to move data from input_buf to line_buf in chunks
    1284                 :             :          * rather than one character at a time.  input_buf_ptr points to the next
    1285                 :             :          * character to examine; any characters from input_buf_index to
    1286                 :             :          * input_buf_ptr have been determined to be part of the line, but not yet
    1287                 :             :          * transferred to line_buf.
    1288                 :             :          *
    1289                 :             :          * For a little extra speed within the loop, we copy input_buf and
    1290                 :             :          * input_buf_len into local variables.
    1291                 :             :          */
    1292                 :      115644 :         copy_input_buf = cstate->input_buf;
    1293                 :      115644 :         input_buf_ptr = cstate->input_buf_index;
    1294                 :      115644 :         copy_buf_len = cstate->input_buf_len;
    1295                 :             : 
    1296                 :     2905408 :         for (;;)
    1297                 :             :         {
    1298                 :     2905410 :                 int                     prev_raw_ptr;
    1299                 :     2905410 :                 char            c;
    1300                 :             : 
    1301                 :             :                 /*
    1302                 :             :                  * Load more data if needed.
    1303                 :             :                  *
    1304                 :             :                  * TODO: We could just force four bytes of read-ahead and avoid the
    1305                 :             :                  * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
    1306                 :             :                  * unsafe with the old v2 COPY protocol, but we don't support that
    1307                 :             :                  * anymore.
    1308                 :             :                  */
    1309   [ +  +  -  + ]:     2905410 :                 if (input_buf_ptr >= copy_buf_len || need_data)
    1310                 :             :                 {
    1311         [ +  + ]:         345 :                         REFILL_LINEBUF;
    1312                 :             : 
    1313                 :         345 :                         CopyLoadInputBuf(cstate);
    1314                 :             :                         /* update our local variables */
    1315                 :         345 :                         hit_eof = cstate->input_reached_eof;
    1316                 :         345 :                         input_buf_ptr = cstate->input_buf_index;
    1317                 :         345 :                         copy_buf_len = cstate->input_buf_len;
    1318                 :             : 
    1319                 :             :                         /*
    1320                 :             :                          * If we are completely out of data, break out of the loop,
    1321                 :             :                          * reporting EOF.
    1322                 :             :                          */
    1323         [ +  + ]:         345 :                         if (INPUT_BUF_BYTES(cstate) <= 0)
    1324                 :             :                         {
    1325                 :         140 :                                 result = true;
    1326                 :         140 :                                 break;
    1327                 :             :                         }
    1328                 :         205 :                         need_data = false;
    1329                 :         205 :                 }
    1330                 :             : 
    1331                 :             :                 /* OK to fetch a character */
    1332                 :     2905266 :                 prev_raw_ptr = input_buf_ptr;
    1333                 :     2905266 :                 c = copy_input_buf[input_buf_ptr++];
    1334                 :             : 
    1335         [ +  + ]:     2905266 :                 if (is_csv)
    1336                 :             :                 {
    1337                 :             :                         /*
    1338                 :             :                          * If character is '\r', we may need to look ahead below.  Force
    1339                 :             :                          * fetch of the next character if we don't already have it.  We
    1340                 :             :                          * need to do this before changing CSV state, in case '\r' is also
    1341                 :             :                          * the quote or escape character.
    1342                 :             :                          */
    1343         [ +  + ]:         745 :                         if (c == '\r')
    1344                 :             :                         {
    1345   [ -  +  #  # ]:           6 :                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1346                 :           6 :                         }
    1347                 :             : 
    1348                 :             :                         /*
    1349                 :             :                          * Dealing with quotes and escapes here is mildly tricky. If the
    1350                 :             :                          * quote char is also the escape char, there's no problem - we
    1351                 :             :                          * just use the char as a toggle. If they are different, we need
    1352                 :             :                          * to ensure that we only take account of an escape inside a
    1353                 :             :                          * quoted field and immediately preceding a quote char, and not
    1354                 :             :                          * the second in an escape-escape sequence.
    1355                 :             :                          */
    1356   [ +  +  +  + ]:         745 :                         if (in_quote && c == escapec)
    1357                 :           8 :                                 last_was_esc = !last_was_esc;
    1358   [ +  +  -  + ]:         745 :                         if (c == quotec && !last_was_esc)
    1359                 :          36 :                                 in_quote = !in_quote;
    1360         [ +  + ]:         745 :                         if (c != escapec)
    1361                 :         736 :                                 last_was_esc = false;
    1362                 :             : 
    1363                 :             :                         /*
    1364                 :             :                          * Updating the line count for embedded CR and/or LF chars is
    1365                 :             :                          * necessarily a little fragile - this test is probably about the
    1366                 :             :                          * best we can do.  (XXX it's arguable whether we should do this
    1367                 :             :                          * at all --- is cur_lineno a physical or logical count?)
    1368                 :             :                          */
    1369   [ +  +  +  + ]:         745 :                         if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
    1370                 :           6 :                                 cstate->cur_lineno++;
    1371                 :         745 :                 }
    1372                 :             : 
    1373                 :             :                 /* Process \r */
    1374   [ +  +  +  -  :     2905266 :                 if (c == '\r' && (!is_csv || !in_quote))
                   +  - ]
    1375                 :             :                 {
    1376                 :             :                         /* Check for \r\n on first line, _and_ handle \r\n. */
    1377   [ #  #  #  # ]:           0 :                         if (cstate->eol_type == EOL_UNKNOWN ||
    1378                 :           0 :                                 cstate->eol_type == EOL_CRNL)
    1379                 :             :                         {
    1380                 :             :                                 /*
    1381                 :             :                                  * If need more data, go back to loop top to load it.
    1382                 :             :                                  *
    1383                 :             :                                  * Note that if we are at EOF, c will wind up as '\0' because
    1384                 :             :                                  * of the guaranteed pad of input_buf.
    1385                 :             :                                  */
    1386   [ #  #  #  # ]:           0 :                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1387                 :             : 
    1388                 :             :                                 /* get next char */
    1389                 :           0 :                                 c = copy_input_buf[input_buf_ptr];
    1390                 :             : 
    1391         [ #  # ]:           0 :                                 if (c == '\n')
    1392                 :             :                                 {
    1393                 :           0 :                                         input_buf_ptr++;        /* eat newline */
    1394                 :           0 :                                         cstate->eol_type = EOL_CRNL; /* in case not set yet */
    1395                 :           0 :                                 }
    1396                 :             :                                 else
    1397                 :             :                                 {
    1398                 :             :                                         /* found \r, but no \n */
    1399         [ #  # ]:           0 :                                         if (cstate->eol_type == EOL_CRNL)
    1400   [ #  #  #  #  :           0 :                                                 ereport(ERROR,
             #  #  #  # ]
    1401                 :             :                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1402                 :             :                                                                  !is_csv ?
    1403                 :             :                                                                  errmsg("literal carriage return found in data") :
    1404                 :             :                                                                  errmsg("unquoted carriage return found in data"),
    1405                 :             :                                                                  !is_csv ?
    1406                 :             :                                                                  errhint("Use \"\\r\" to represent carriage return.") :
    1407                 :             :                                                                  errhint("Use quoted CSV field to represent carriage return.")));
    1408                 :             : 
    1409                 :             :                                         /*
    1410                 :             :                                          * if we got here, it is the first line and we didn't find
    1411                 :             :                                          * \n, so don't consume the peeked character
    1412                 :             :                                          */
    1413                 :           0 :                                         cstate->eol_type = EOL_CR;
    1414                 :             :                                 }
    1415                 :           0 :                         }
    1416         [ #  # ]:           0 :                         else if (cstate->eol_type == EOL_NL)
    1417   [ #  #  #  #  :           0 :                                 ereport(ERROR,
             #  #  #  # ]
    1418                 :             :                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1419                 :             :                                                  !is_csv ?
    1420                 :             :                                                  errmsg("literal carriage return found in data") :
    1421                 :             :                                                  errmsg("unquoted carriage return found in data"),
    1422                 :             :                                                  !is_csv ?
    1423                 :             :                                                  errhint("Use \"\\r\" to represent carriage return.") :
    1424                 :             :                                                  errhint("Use quoted CSV field to represent carriage return.")));
    1425                 :             :                         /* If reach here, we have found the line terminator */
    1426                 :           0 :                         break;
    1427                 :             :                 }
    1428                 :             : 
    1429                 :             :                 /* Process \n */
    1430   [ +  +  +  +  :     2905266 :                 if (c == '\n' && (!is_csv || !in_quote))
                   +  + ]
    1431                 :             :                 {
    1432         [ +  - ]:      115500 :                         if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
    1433   [ #  #  #  #  :           0 :                                 ereport(ERROR,
             #  #  #  # ]
    1434                 :             :                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1435                 :             :                                                  !is_csv ?
    1436                 :             :                                                  errmsg("literal newline found in data") :
    1437                 :             :                                                  errmsg("unquoted newline found in data"),
    1438                 :             :                                                  !is_csv ?
    1439                 :             :                                                  errhint("Use \"\\n\" to represent newline.") :
    1440                 :             :                                                  errhint("Use quoted CSV field to represent newline.")));
    1441                 :      115500 :                         cstate->eol_type = EOL_NL;   /* in case not set yet */
    1442                 :             :                         /* If reach here, we have found the line terminator */
    1443                 :      115500 :                         break;
    1444                 :             :                 }
    1445                 :             : 
    1446                 :             :                 /*
    1447                 :             :                  * Process backslash, except in CSV mode where backslash is a normal
    1448                 :             :                  * character.
    1449                 :             :                  */
    1450   [ +  +  +  + ]:     2789766 :                 if (c == '\\' && !is_csv)
    1451                 :             :                 {
    1452                 :         850 :                         char            c2;
    1453                 :             : 
    1454   [ -  +  #  # ]:         850 :                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1455   [ -  +  #  # ]:         850 :                         IF_NEED_REFILL_AND_EOF_BREAK(0);
    1456                 :             : 
    1457                 :             :                         /* -----
    1458                 :             :                          * get next character
    1459                 :             :                          * Note: we do not change c so if it isn't \., we can fall
    1460                 :             :                          * through and continue processing.
    1461                 :             :                          * -----
    1462                 :             :                          */
    1463                 :         850 :                         c2 = copy_input_buf[input_buf_ptr];
    1464                 :             : 
    1465         [ +  + ]:         850 :                         if (c2 == '.')
    1466                 :             :                         {
    1467                 :           2 :                                 input_buf_ptr++;        /* consume the '.' */
    1468         [ +  - ]:           2 :                                 if (cstate->eol_type == EOL_CRNL)
    1469                 :             :                                 {
    1470                 :             :                                         /* Get the next character */
    1471   [ #  #  #  # ]:           0 :                                         IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1472                 :             :                                         /* if hit_eof, c2 will become '\0' */
    1473                 :           0 :                                         c2 = copy_input_buf[input_buf_ptr++];
    1474                 :             : 
    1475         [ #  # ]:           0 :                                         if (c2 == '\n')
    1476   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
    1477                 :             :                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1478                 :             :                                                                  errmsg("end-of-copy marker does not match previous newline style")));
    1479         [ #  # ]:           0 :                                         else if (c2 != '\r')
    1480   [ #  #  #  # ]:           0 :                                                 ereport(ERROR,
    1481                 :             :                                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1482                 :             :                                                                  errmsg("end-of-copy marker is not alone on its line")));
    1483                 :           0 :                                 }
    1484                 :             : 
    1485                 :             :                                 /* Get the next character */
    1486   [ -  +  #  # ]:           2 :                                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1487                 :             :                                 /* if hit_eof, c2 will become '\0' */
    1488                 :           2 :                                 c2 = copy_input_buf[input_buf_ptr++];
    1489                 :             : 
    1490   [ +  -  +  + ]:           2 :                                 if (c2 != '\r' && c2 != '\n')
    1491   [ +  -  +  - ]:           1 :                                         ereport(ERROR,
    1492                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1493                 :             :                                                          errmsg("end-of-copy marker is not alone on its line")));
    1494                 :             : 
    1495   [ +  -  +  - ]:           1 :                                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
    1496         [ -  + ]:           1 :                                         (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
    1497         [ -  + ]:           1 :                                         (cstate->eol_type == EOL_CR && c2 != '\r'))
    1498   [ #  #  #  # ]:           0 :                                         ereport(ERROR,
    1499                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1500                 :             :                                                          errmsg("end-of-copy marker does not match previous newline style")));
    1501                 :             : 
    1502                 :             :                                 /*
    1503                 :             :                                  * If there is any data on this line before the \., complain.
    1504                 :             :                                  */
    1505         [ -  + ]:           1 :                                 if (cstate->line_buf.len > 0 ||
    1506                 :           0 :                                         prev_raw_ptr > cstate->input_buf_index)
    1507   [ +  -  +  - ]:           1 :                                         ereport(ERROR,
    1508                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1509                 :             :                                                          errmsg("end-of-copy marker is not alone on its line")));
    1510                 :             : 
    1511                 :             :                                 /*
    1512                 :             :                                  * Discard the \. and newline, then report EOF.
    1513                 :             :                                  */
    1514                 :           0 :                                 cstate->input_buf_index = input_buf_ptr;
    1515                 :           0 :                                 result = true;  /* report EOF */
    1516                 :           0 :                                 break;
    1517                 :             :                         }
    1518                 :             :                         else
    1519                 :             :                         {
    1520                 :             :                                 /*
    1521                 :             :                                  * If we are here, it means we found a backslash followed by
    1522                 :             :                                  * something other than a period.  In non-CSV mode, anything
    1523                 :             :                                  * after a backslash is special, so we skip over that second
    1524                 :             :                                  * character too.  If we didn't do that \\. would be
    1525                 :             :                                  * considered an eof-of copy, while in non-CSV mode it is a
    1526                 :             :                                  * literal backslash followed by a period.
    1527                 :             :                                  */
    1528                 :         848 :                                 input_buf_ptr++;
    1529                 :             :                         }
    1530         [ -  + ]:         848 :                 }
    1531   [ -  +  +  + ]:     2905404 :         }                                                       /* end of outer loop */
    1532                 :             : 
    1533                 :             :         /*
    1534                 :             :          * Transfer any still-uncopied data to line_buf.
    1535                 :             :          */
    1536         [ +  + ]:      115638 :         REFILL_LINEBUF;
    1537                 :             : 
    1538                 :      231276 :         return result;
    1539                 :      115638 : }
    1540                 :             : 
    1541                 :             : /*
    1542                 :             :  *      Return decimal value for a hexadecimal digit
    1543                 :             :  */
    1544                 :             : static int
    1545                 :           0 : GetDecimalFromHex(char hex)
    1546                 :             : {
    1547         [ #  # ]:           0 :         if (isdigit((unsigned char) hex))
    1548                 :           0 :                 return hex - '0';
    1549                 :             :         else
    1550                 :           0 :                 return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
    1551                 :           0 : }
    1552                 :             : 
    1553                 :             : /*
    1554                 :             :  * Parse the current line into separate attributes (fields),
    1555                 :             :  * performing de-escaping as needed.
    1556                 :             :  *
    1557                 :             :  * The input is in line_buf.  We use attribute_buf to hold the result
    1558                 :             :  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
    1559                 :             :  * string, or NULL when the input matches the null marker string.
    1560                 :             :  * This array is expanded as necessary.
    1561                 :             :  *
    1562                 :             :  * (Note that the caller cannot check for nulls since the returned
    1563                 :             :  * string would be the post-de-escaping equivalent, which may look
    1564                 :             :  * the same as some valid data string.)
    1565                 :             :  *
    1566                 :             :  * delim is the column delimiter string (must be just one byte for now).
    1567                 :             :  * null_print is the null marker string.  Note that this is compared to
    1568                 :             :  * the pre-de-escaped input string.
    1569                 :             :  *
    1570                 :             :  * The return value is the number of fields actually read.
    1571                 :             :  */
    1572                 :             : static int
    1573                 :      115431 : CopyReadAttributesText(CopyFromState cstate)
    1574                 :             : {
    1575                 :      115431 :         char            delimc = cstate->opts.delim[0];
    1576                 :      115431 :         int                     fieldno;
    1577                 :      115431 :         char       *output_ptr;
    1578                 :      115431 :         char       *cur_ptr;
    1579                 :      115431 :         char       *line_end_ptr;
    1580                 :             : 
    1581                 :             :         /*
    1582                 :             :          * We need a special case for zero-column tables: check that the input
    1583                 :             :          * line is empty, and return.
    1584                 :             :          */
    1585         [ -  + ]:      115431 :         if (cstate->max_fields <= 0)
    1586                 :             :         {
    1587         [ #  # ]:           0 :                 if (cstate->line_buf.len != 0)
    1588   [ #  #  #  # ]:           0 :                         ereport(ERROR,
    1589                 :             :                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1590                 :             :                                          errmsg("extra data after last expected column")));
    1591                 :           0 :                 return 0;
    1592                 :             :         }
    1593                 :             : 
    1594                 :      115431 :         resetStringInfo(&cstate->attribute_buf);
    1595                 :             : 
    1596                 :             :         /*
    1597                 :             :          * The de-escaped attributes will certainly not be longer than the input
    1598                 :             :          * data line, so we can just force attribute_buf to be large enough and
    1599                 :             :          * then transfer data without any checks for enough space.  We need to do
    1600                 :             :          * it this way because enlarging attribute_buf mid-stream would invalidate
    1601                 :             :          * pointers already stored into cstate->raw_fields[].
    1602                 :             :          */
    1603         [ +  - ]:      115431 :         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1604                 :           0 :                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1605                 :      115431 :         output_ptr = cstate->attribute_buf.data;
    1606                 :             : 
    1607                 :             :         /* set pointer variables for loop */
    1608                 :      115431 :         cur_ptr = cstate->line_buf.data;
    1609                 :      115431 :         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1610                 :             : 
    1611                 :             :         /* Outer loop iterates over fields */
    1612                 :      115431 :         fieldno = 0;
    1613                 :      399994 :         for (;;)
    1614                 :             :         {
    1615                 :      399994 :                 bool            found_delim = false;
    1616                 :      399994 :                 char       *start_ptr;
    1617                 :      399994 :                 char       *end_ptr;
    1618                 :      399994 :                 int                     input_len;
    1619                 :      399994 :                 bool            saw_non_ascii = false;
    1620                 :             : 
    1621                 :             :                 /* Make sure there is enough space for the next value */
    1622         [ +  + ]:      399994 :                 if (fieldno >= cstate->max_fields)
    1623                 :             :                 {
    1624                 :           6 :                         cstate->max_fields *= 2;
    1625                 :           6 :                         cstate->raw_fields =
    1626                 :           6 :                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1627                 :           6 :                 }
    1628                 :             : 
    1629                 :             :                 /* Remember start of field on both input and output sides */
    1630                 :      399994 :                 start_ptr = cur_ptr;
    1631                 :      399994 :                 cstate->raw_fields[fieldno] = output_ptr;
    1632                 :             : 
    1633                 :             :                 /*
    1634                 :             :                  * Scan data for field.
    1635                 :             :                  *
    1636                 :             :                  * Note that in this loop, we are scanning to locate the end of field
    1637                 :             :                  * and also speculatively performing de-escaping.  Once we find the
    1638                 :             :                  * end-of-field, we can match the raw field contents against the null
    1639                 :             :                  * marker string.  Only after that comparison fails do we know that
    1640                 :             :                  * de-escaping is actually the right thing to do; therefore we *must
    1641                 :             :                  * not* throw any syntax errors before we've done the null-marker
    1642                 :             :                  * check.
    1643                 :             :                  */
    1644                 :     2904433 :                 for (;;)
    1645                 :             :                 {
    1646                 :     2904433 :                         char            c;
    1647                 :             : 
    1648                 :     2904433 :                         end_ptr = cur_ptr;
    1649         [ +  + ]:     2904433 :                         if (cur_ptr >= line_end_ptr)
    1650                 :      115430 :                                 break;
    1651                 :     2789003 :                         c = *cur_ptr++;
    1652         [ +  + ]:     2789003 :                         if (c == delimc)
    1653                 :             :                         {
    1654                 :      284564 :                                 found_delim = true;
    1655                 :      284564 :                                 break;
    1656                 :             :                         }
    1657         [ +  + ]:     2504439 :                         if (c == '\\')
    1658                 :             :                         {
    1659         [ -  + ]:         848 :                                 if (cur_ptr >= line_end_ptr)
    1660                 :           0 :                                         break;
    1661                 :         848 :                                 c = *cur_ptr++;
    1662   [ +  -  +  -  :         848 :                                 switch (c)
             +  +  -  -  
                      - ]
    1663                 :             :                                 {
    1664                 :             :                                         case '0':
    1665                 :             :                                         case '1':
    1666                 :             :                                         case '2':
    1667                 :             :                                         case '3':
    1668                 :             :                                         case '4':
    1669                 :             :                                         case '5':
    1670                 :             :                                         case '6':
    1671                 :             :                                         case '7':
    1672                 :             :                                                 {
    1673                 :             :                                                         /* handle \013 */
    1674                 :           2 :                                                         int                     val;
    1675                 :             : 
    1676                 :           2 :                                                         val = OCTVALUE(c);
    1677         [ +  + ]:           2 :                                                         if (cur_ptr < line_end_ptr)
    1678                 :             :                                                         {
    1679                 :           1 :                                                                 c = *cur_ptr;
    1680   [ -  +  #  # ]:           1 :                                                                 if (ISOCTAL(c))
    1681                 :             :                                                                 {
    1682                 :           0 :                                                                         cur_ptr++;
    1683                 :           0 :                                                                         val = (val << 3) + OCTVALUE(c);
    1684         [ #  # ]:           0 :                                                                         if (cur_ptr < line_end_ptr)
    1685                 :             :                                                                         {
    1686                 :           0 :                                                                                 c = *cur_ptr;
    1687   [ #  #  #  # ]:           0 :                                                                                 if (ISOCTAL(c))
    1688                 :             :                                                                                 {
    1689                 :           0 :                                                                                         cur_ptr++;
    1690                 :           0 :                                                                                         val = (val << 3) + OCTVALUE(c);
    1691                 :           0 :                                                                                 }
    1692                 :           0 :                                                                         }
    1693                 :           0 :                                                                 }
    1694                 :           1 :                                                         }
    1695                 :           2 :                                                         c = val & 0377;
    1696   [ -  +  #  # ]:           2 :                                                         if (c == '\0' || IS_HIGHBIT_SET(c))
    1697                 :           2 :                                                                 saw_non_ascii = true;
    1698                 :           2 :                                                 }
    1699                 :           2 :                                                 break;
    1700                 :             :                                         case 'x':
    1701                 :             :                                                 /* Handle \x3F */
    1702         [ +  + ]:           2 :                                                 if (cur_ptr < line_end_ptr)
    1703                 :             :                                                 {
    1704                 :           1 :                                                         char            hexchar = *cur_ptr;
    1705                 :             : 
    1706         [ -  + ]:           1 :                                                         if (isxdigit((unsigned char) hexchar))
    1707                 :             :                                                         {
    1708                 :           0 :                                                                 int                     val = GetDecimalFromHex(hexchar);
    1709                 :             : 
    1710                 :           0 :                                                                 cur_ptr++;
    1711         [ #  # ]:           0 :                                                                 if (cur_ptr < line_end_ptr)
    1712                 :             :                                                                 {
    1713                 :           0 :                                                                         hexchar = *cur_ptr;
    1714         [ #  # ]:           0 :                                                                         if (isxdigit((unsigned char) hexchar))
    1715                 :             :                                                                         {
    1716                 :           0 :                                                                                 cur_ptr++;
    1717                 :           0 :                                                                                 val = (val << 4) + GetDecimalFromHex(hexchar);
    1718                 :           0 :                                                                         }
    1719                 :           0 :                                                                 }
    1720                 :           0 :                                                                 c = val & 0xff;
    1721   [ #  #  #  # ]:           0 :                                                                 if (c == '\0' || IS_HIGHBIT_SET(c))
    1722                 :           0 :                                                                         saw_non_ascii = true;
    1723                 :           0 :                                                         }
    1724                 :           1 :                                                 }
    1725                 :           2 :                                                 break;
    1726                 :             :                                         case 'b':
    1727                 :           0 :                                                 c = '\b';
    1728                 :           0 :                                                 break;
    1729                 :             :                                         case 'f':
    1730                 :           0 :                                                 c = '\f';
    1731                 :           0 :                                                 break;
    1732                 :             :                                         case 'n':
    1733                 :         508 :                                                 c = '\n';
    1734                 :         508 :                                                 break;
    1735                 :             :                                         case 'r':
    1736                 :           0 :                                                 c = '\r';
    1737                 :           0 :                                                 break;
    1738                 :             :                                         case 't':
    1739                 :           0 :                                                 c = '\t';
    1740                 :           0 :                                                 break;
    1741                 :             :                                         case 'v':
    1742                 :           0 :                                                 c = '\v';
    1743                 :           0 :                                                 break;
    1744                 :             : 
    1745                 :             :                                                 /*
    1746                 :             :                                                  * in all other cases, take the char after '\'
    1747                 :             :                                                  * literally
    1748                 :             :                                                  */
    1749                 :             :                                 }
    1750                 :         848 :                         }
    1751                 :             : 
    1752                 :             :                         /* Add c to output string */
    1753                 :     2504439 :                         *output_ptr++ = c;
    1754         [ +  + ]:     2904433 :                 }
    1755                 :             : 
    1756                 :             :                 /* Check whether raw input matched null marker */
    1757                 :      399994 :                 input_len = end_ptr - start_ptr;
    1758   [ +  +  +  + ]:      399994 :                 if (input_len == cstate->opts.null_print_len &&
    1759                 :       34158 :                         strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1760                 :         322 :                         cstate->raw_fields[fieldno] = NULL;
    1761                 :             :                 /* Check whether raw input matched default marker */
    1762         [ +  + ]:      399672 :                 else if (fieldno < list_length(cstate->attnumlist) &&
    1763         [ +  + ]:      399665 :                                  cstate->opts.default_print &&
    1764   [ +  +  -  + ]:          19 :                                  input_len == cstate->opts.default_print_len &&
    1765                 :           5 :                                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1766                 :             :                 {
    1767                 :             :                         /* fieldno is 0-indexed and attnum is 1-indexed */
    1768                 :           5 :                         int                     m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1769                 :             : 
    1770         [ +  + ]:           5 :                         if (cstate->defexprs[m] != NULL)
    1771                 :             :                         {
    1772                 :             :                                 /* defaults contain entries for all physical attributes */
    1773                 :           4 :                                 cstate->defaults[m] = true;
    1774                 :           4 :                         }
    1775                 :             :                         else
    1776                 :             :                         {
    1777                 :           1 :                                 TupleDesc       tupDesc = RelationGetDescr(cstate->rel);
    1778                 :           1 :                                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1779                 :             : 
    1780   [ +  -  +  - ]:           1 :                                 ereport(ERROR,
    1781                 :             :                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1782                 :             :                                                  errmsg("unexpected default marker in COPY data"),
    1783                 :             :                                                  errdetail("Column \"%s\" has no default value.",
    1784                 :             :                                                                    NameStr(att->attname))));
    1785                 :           0 :                         }
    1786                 :           4 :                 }
    1787                 :             :                 else
    1788                 :             :                 {
    1789                 :             :                         /*
    1790                 :             :                          * At this point we know the field is supposed to contain data.
    1791                 :             :                          *
    1792                 :             :                          * If we de-escaped any non-7-bit-ASCII chars, make sure the
    1793                 :             :                          * resulting string is valid data for the db encoding.
    1794                 :             :                          */
    1795         [ +  - ]:      399667 :                         if (saw_non_ascii)
    1796                 :             :                         {
    1797                 :           0 :                                 char       *fld = cstate->raw_fields[fieldno];
    1798                 :             : 
    1799                 :           0 :                                 pg_verifymbstr(fld, output_ptr - fld, false);
    1800                 :           0 :                         }
    1801                 :             :                 }
    1802                 :             : 
    1803                 :             :                 /* Terminate attribute value in output area */
    1804                 :      399993 :                 *output_ptr++ = '\0';
    1805                 :             : 
    1806                 :      399993 :                 fieldno++;
    1807                 :             :                 /* Done if we hit EOL instead of a delim */
    1808         [ +  + ]:      399993 :                 if (!found_delim)
    1809                 :      115430 :                         break;
    1810         [ +  + ]:      399993 :         }
    1811                 :             : 
    1812                 :             :         /* Clean up state of attribute_buf */
    1813                 :      115430 :         output_ptr--;
    1814         [ +  - ]:      115430 :         Assert(*output_ptr == '\0');
    1815                 :      115430 :         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    1816                 :             : 
    1817                 :      115430 :         return fieldno;
    1818                 :      115430 : }
    1819                 :             : 
    1820                 :             : /*
    1821                 :             :  * Parse the current line into separate attributes (fields),
    1822                 :             :  * performing de-escaping as needed.  This has exactly the same API as
    1823                 :             :  * CopyReadAttributesText, except we parse the fields according to
    1824                 :             :  * "standard" (i.e. common) CSV usage.
    1825                 :             :  */
    1826                 :             : static int
    1827                 :          56 : CopyReadAttributesCSV(CopyFromState cstate)
    1828                 :             : {
    1829                 :          56 :         char            delimc = cstate->opts.delim[0];
    1830                 :          56 :         char            quotec = cstate->opts.quote[0];
    1831                 :          56 :         char            escapec = cstate->opts.escape[0];
    1832                 :          56 :         int                     fieldno;
    1833                 :          56 :         char       *output_ptr;
    1834                 :          56 :         char       *cur_ptr;
    1835                 :          56 :         char       *line_end_ptr;
    1836                 :             : 
    1837                 :             :         /*
    1838                 :             :          * We need a special case for zero-column tables: check that the input
    1839                 :             :          * line is empty, and return.
    1840                 :             :          */
    1841         [ -  + ]:          56 :         if (cstate->max_fields <= 0)
    1842                 :             :         {
    1843         [ #  # ]:           0 :                 if (cstate->line_buf.len != 0)
    1844   [ #  #  #  # ]:           0 :                         ereport(ERROR,
    1845                 :             :                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1846                 :             :                                          errmsg("extra data after last expected column")));
    1847                 :           0 :                 return 0;
    1848                 :             :         }
    1849                 :             : 
    1850                 :          56 :         resetStringInfo(&cstate->attribute_buf);
    1851                 :             : 
    1852                 :             :         /*
    1853                 :             :          * The de-escaped attributes will certainly not be longer than the input
    1854                 :             :          * data line, so we can just force attribute_buf to be large enough and
    1855                 :             :          * then transfer data without any checks for enough space.  We need to do
    1856                 :             :          * it this way because enlarging attribute_buf mid-stream would invalidate
    1857                 :             :          * pointers already stored into cstate->raw_fields[].
    1858                 :             :          */
    1859         [ +  - ]:          56 :         if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1860                 :           0 :                 enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1861                 :          56 :         output_ptr = cstate->attribute_buf.data;
    1862                 :             : 
    1863                 :             :         /* set pointer variables for loop */
    1864                 :          56 :         cur_ptr = cstate->line_buf.data;
    1865                 :          56 :         line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1866                 :             : 
    1867                 :             :         /* Outer loop iterates over fields */
    1868                 :          56 :         fieldno = 0;
    1869                 :         111 :         for (;;)
    1870                 :             :         {
    1871                 :         111 :                 bool            found_delim = false;
    1872                 :         111 :                 bool            saw_quote = false;
    1873                 :         111 :                 char       *start_ptr;
    1874                 :         111 :                 char       *end_ptr;
    1875                 :         111 :                 int                     input_len;
    1876                 :             : 
    1877                 :             :                 /* Make sure there is enough space for the next value */
    1878         [ +  - ]:         111 :                 if (fieldno >= cstate->max_fields)
    1879                 :             :                 {
    1880                 :           0 :                         cstate->max_fields *= 2;
    1881                 :           0 :                         cstate->raw_fields =
    1882                 :           0 :                                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1883                 :           0 :                 }
    1884                 :             : 
    1885                 :             :                 /* Remember start of field on both input and output sides */
    1886                 :         111 :                 start_ptr = cur_ptr;
    1887                 :         111 :                 cstate->raw_fields[fieldno] = output_ptr;
    1888                 :             : 
    1889                 :             :                 /*
    1890                 :             :                  * Scan data for field,
    1891                 :             :                  *
    1892                 :             :                  * The loop starts in "not quote" mode and then toggles between that
    1893                 :             :                  * and "in quote" mode. The loop exits normally if it is in "not
    1894                 :             :                  * quote" mode and a delimiter or line end is seen.
    1895                 :             :                  */
    1896                 :         129 :                 for (;;)
    1897                 :             :                 {
    1898                 :         129 :                         char            c;
    1899                 :             : 
    1900                 :             :                         /* Not in quote */
    1901                 :         358 :                         for (;;)
    1902                 :             :                         {
    1903                 :         358 :                                 end_ptr = cur_ptr;
    1904         [ +  + ]:         358 :                                 if (cur_ptr >= line_end_ptr)
    1905                 :          55 :                                         goto endfield;
    1906                 :         303 :                                 c = *cur_ptr++;
    1907                 :             :                                 /* unquoted field delimiter */
    1908         [ +  + ]:         303 :                                 if (c == delimc)
    1909                 :             :                                 {
    1910                 :          56 :                                         found_delim = true;
    1911                 :          56 :                                         goto endfield;
    1912                 :             :                                 }
    1913                 :             :                                 /* start of quoted field (or part of field) */
    1914         [ +  + ]:         247 :                                 if (c == quotec)
    1915                 :             :                                 {
    1916                 :          18 :                                         saw_quote = true;
    1917                 :          18 :                                         break;
    1918                 :             :                                 }
    1919                 :             :                                 /* Add c to output string */
    1920                 :         229 :                                 *output_ptr++ = c;
    1921                 :             :                         }
    1922                 :             : 
    1923                 :             :                         /* In quote */
    1924                 :         111 :                         for (;;)
    1925                 :             :                         {
    1926                 :         115 :                                 end_ptr = cur_ptr;
    1927         [ +  - ]:         115 :                                 if (cur_ptr >= line_end_ptr)
    1928   [ #  #  #  # ]:           0 :                                         ereport(ERROR,
    1929                 :             :                                                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1930                 :             :                                                          errmsg("unterminated CSV quoted field")));
    1931                 :             : 
    1932                 :         115 :                                 c = *cur_ptr++;
    1933                 :             : 
    1934                 :             :                                 /* escape within a quoted field */
    1935         [ +  + ]:         115 :                                 if (c == escapec)
    1936                 :             :                                 {
    1937                 :             :                                         /*
    1938                 :             :                                          * peek at the next char if available, and escape it if it
    1939                 :             :                                          * is an escape char or a quote char
    1940                 :             :                                          */
    1941         [ +  + ]:          18 :                                         if (cur_ptr < line_end_ptr)
    1942                 :             :                                         {
    1943                 :          11 :                                                 char            nextc = *cur_ptr;
    1944                 :             : 
    1945   [ +  +  -  + ]:          11 :                                                 if (nextc == escapec || nextc == quotec)
    1946                 :             :                                                 {
    1947                 :           4 :                                                         *output_ptr++ = nextc;
    1948                 :           4 :                                                         cur_ptr++;
    1949                 :           4 :                                                         continue;
    1950                 :             :                                                 }
    1951         [ +  + ]:          11 :                                         }
    1952                 :          14 :                                 }
    1953                 :             : 
    1954                 :             :                                 /*
    1955                 :             :                                  * end of quoted field. Must do this test after testing for
    1956                 :             :                                  * escape in case quote char and escape char are the same
    1957                 :             :                                  * (which is the common case).
    1958                 :             :                                  */
    1959         [ +  + ]:         111 :                                 if (c == quotec)
    1960                 :          18 :                                         break;
    1961                 :             : 
    1962                 :             :                                 /* Add c to output string */
    1963                 :          93 :                                 *output_ptr++ = c;
    1964                 :             :                         }
    1965      [ -  +  + ]:         129 :                 }
    1966                 :             : endfield:
    1967                 :             : 
    1968                 :             :                 /* Terminate attribute value in output area */
    1969                 :         111 :                 *output_ptr++ = '\0';
    1970                 :             : 
    1971                 :             :                 /* Check whether raw input matched null marker */
    1972                 :         111 :                 input_len = end_ptr - start_ptr;
    1973   [ +  +  +  +  :         111 :                 if (!saw_quote && input_len == cstate->opts.null_print_len &&
                   -  + ]
    1974                 :           5 :                         strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1975                 :           5 :                         cstate->raw_fields[fieldno] = NULL;
    1976                 :             :                 /* Check whether raw input matched default marker */
    1977         [ +  - ]:         106 :                 else if (fieldno < list_length(cstate->attnumlist) &&
    1978         [ +  + ]:         106 :                                  cstate->opts.default_print &&
    1979   [ +  +  -  + ]:          19 :                                  input_len == cstate->opts.default_print_len &&
    1980                 :           5 :                                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1981                 :             :                 {
    1982                 :             :                         /* fieldno is 0-index and attnum is 1-index */
    1983                 :           5 :                         int                     m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1984                 :             : 
    1985         [ +  + ]:           5 :                         if (cstate->defexprs[m] != NULL)
    1986                 :             :                         {
    1987                 :             :                                 /* defaults contain entries for all physical attributes */
    1988                 :           4 :                                 cstate->defaults[m] = true;
    1989                 :           4 :                         }
    1990                 :             :                         else
    1991                 :             :                         {
    1992                 :           1 :                                 TupleDesc       tupDesc = RelationGetDescr(cstate->rel);
    1993                 :           1 :                                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1994                 :             : 
    1995   [ +  -  +  - ]:           1 :                                 ereport(ERROR,
    1996                 :             :                                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1997                 :             :                                                  errmsg("unexpected default marker in COPY data"),
    1998                 :             :                                                  errdetail("Column \"%s\" has no default value.",
    1999                 :             :                                                                    NameStr(att->attname))));
    2000                 :           0 :                         }
    2001                 :           4 :                 }
    2002                 :             : 
    2003                 :         110 :                 fieldno++;
    2004                 :             :                 /* Done if we hit EOL instead of a delim */
    2005         [ +  + ]:         110 :                 if (!found_delim)
    2006                 :          55 :                         break;
    2007         [ +  + ]:         110 :         }
    2008                 :             : 
    2009                 :             :         /* Clean up state of attribute_buf */
    2010                 :          55 :         output_ptr--;
    2011         [ +  - ]:          55 :         Assert(*output_ptr == '\0');
    2012                 :          55 :         cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    2013                 :             : 
    2014                 :          55 :         return fieldno;
    2015                 :          55 : }
    2016                 :             : 
    2017                 :             : 
    2018                 :             : /*
    2019                 :             :  * Read a binary attribute
    2020                 :             :  */
    2021                 :             : static Datum
    2022                 :          21 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
    2023                 :             :                                                 Oid typioparam, int32 typmod,
    2024                 :             :                                                 bool *isnull)
    2025                 :             : {
    2026                 :          21 :         int32           fld_size;
    2027                 :          21 :         Datum           result;
    2028                 :             : 
    2029         [ +  - ]:          21 :         if (!CopyGetInt32(cstate, &fld_size))
    2030   [ #  #  #  # ]:           0 :                 ereport(ERROR,
    2031                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2032                 :             :                                  errmsg("unexpected EOF in COPY data")));
    2033         [ +  + ]:          21 :         if (fld_size == -1)
    2034                 :             :         {
    2035                 :           5 :                 *isnull = true;
    2036                 :           5 :                 return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    2037                 :             :         }
    2038         [ +  - ]:          16 :         if (fld_size < 0)
    2039   [ #  #  #  # ]:           0 :                 ereport(ERROR,
    2040                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2041                 :             :                                  errmsg("invalid field size")));
    2042                 :             : 
    2043                 :             :         /* reset attribute_buf to empty, and load raw data in it */
    2044                 :          16 :         resetStringInfo(&cstate->attribute_buf);
    2045                 :             : 
    2046                 :          16 :         enlargeStringInfo(&cstate->attribute_buf, fld_size);
    2047                 :          32 :         if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
    2048   [ +  -  +  - ]:          32 :                                                    fld_size) != fld_size)
    2049   [ #  #  #  # ]:           0 :                 ereport(ERROR,
    2050                 :             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2051                 :             :                                  errmsg("unexpected EOF in COPY data")));
    2052                 :             : 
    2053                 :          16 :         cstate->attribute_buf.len = fld_size;
    2054                 :          16 :         cstate->attribute_buf.data[fld_size] = '\0';
    2055                 :             : 
    2056                 :             :         /* Call the column type's binary input converter */
    2057                 :          32 :         result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
    2058                 :          16 :                                                                  typioparam, typmod);
    2059                 :             : 
    2060                 :             :         /* Trouble if it didn't eat the whole buffer */
    2061         [ +  - ]:          16 :         if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
    2062   [ #  #  #  # ]:           0 :                 ereport(ERROR,
    2063                 :             :                                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    2064                 :             :                                  errmsg("incorrect binary data format")));
    2065                 :             : 
    2066                 :          16 :         *isnull = false;
    2067                 :          16 :         return result;
    2068                 :          21 : }
        

Generated by: LCOV version 2.3.2-1