LCOV - code coverage report
Current view: top level - src/backend/regex - regc_nfa.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 81.8 % 1632 1335
Test Date: 2026-01-26 10:56:24 Functions: 90.3 % 62 56
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
Branches: 68.8 % 1214 835

             Branch data     Line data    Source code
       1                 :             : /*
       2                 :             :  * NFA utilities.
       3                 :             :  * This file is #included by regcomp.c.
       4                 :             :  *
       5                 :             :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
       6                 :             :  *
       7                 :             :  * Development of this software was funded, in part, by Cray Research Inc.,
       8                 :             :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
       9                 :             :  * Corporation, none of whom are responsible for the results.  The author
      10                 :             :  * thanks all of them.
      11                 :             :  *
      12                 :             :  * Redistribution and use in source and binary forms -- with or without
      13                 :             :  * modification -- are permitted for any purpose, provided that
      14                 :             :  * redistributions in source form retain this entire copyright notice and
      15                 :             :  * indicate the origin and nature of any modifications.
      16                 :             :  *
      17                 :             :  * I'd appreciate being given credit for this package in the documentation
      18                 :             :  * of software which uses it, but that is not a requirement.
      19                 :             :  *
      20                 :             :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
      21                 :             :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
      22                 :             :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
      23                 :             :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
      24                 :             :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
      25                 :             :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
      26                 :             :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      27                 :             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
      28                 :             :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
      29                 :             :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30                 :             :  *
      31                 :             :  * src/backend/regex/regc_nfa.c
      32                 :             :  *
      33                 :             :  *
      34                 :             :  * One or two things that technically ought to be in here
      35                 :             :  * are actually in color.c, thanks to some incestuous relationships in
      36                 :             :  * the color chains.
      37                 :             :  */
      38                 :             : 
      39                 :             : #define NISERR()        VISERR(nfa->v)
      40                 :             : #define NERR(e)         VERR(nfa->v, (e))
      41                 :             : 
      42                 :             : 
      43                 :             : /*
      44                 :             :  * newnfa - set up an NFA
      45                 :             :  */
      46                 :             : static struct nfa *                             /* the NFA, or NULL */
      47                 :        1931 : newnfa(struct vars *v,
      48                 :             :            struct colormap *cm,
      49                 :             :            struct nfa *parent)          /* NULL if primary NFA */
      50                 :             : {
      51                 :        1931 :         struct nfa *nfa;
      52                 :             : 
      53                 :        1931 :         nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
      54         [ +  - ]:        1931 :         if (nfa == NULL)
      55                 :             :         {
      56         [ #  # ]:           0 :                 ERR(REG_ESPACE);
      57                 :           0 :                 return NULL;
      58                 :             :         }
      59                 :             : 
      60                 :             :         /* Make the NFA minimally valid, so freenfa() will behave sanely */
      61                 :        1931 :         nfa->states = NULL;
      62                 :        1931 :         nfa->slast = NULL;
      63                 :        1931 :         nfa->freestates = NULL;
      64                 :        1931 :         nfa->freearcs = NULL;
      65                 :        1931 :         nfa->lastsb = NULL;
      66                 :        1931 :         nfa->lastab = NULL;
      67                 :        1931 :         nfa->lastsbused = 0;
      68                 :        1931 :         nfa->lastabused = 0;
      69                 :        1931 :         nfa->nstates = 0;
      70                 :        1931 :         nfa->cm = cm;
      71                 :        1931 :         nfa->v = v;
      72                 :        1931 :         nfa->bos[0] = nfa->bos[1] = COLORLESS;
      73                 :        1931 :         nfa->eos[0] = nfa->eos[1] = COLORLESS;
      74                 :        1931 :         nfa->flags = 0;
      75                 :        1931 :         nfa->minmatchall = nfa->maxmatchall = -1;
      76                 :        1931 :         nfa->parent = parent;                /* Precedes newfstate so parent is valid. */
      77                 :             : 
      78                 :             :         /* Create required infrastructure */
      79                 :        1931 :         nfa->post = newfstate(nfa, '@');     /* number 0 */
      80                 :        1931 :         nfa->pre = newfstate(nfa, '>'); /* number 1 */
      81                 :        1931 :         nfa->init = newstate(nfa);   /* may become invalid later */
      82                 :        1931 :         nfa->final = newstate(nfa);
      83         [ -  + ]:        1931 :         if (ISERR())
      84                 :             :         {
      85                 :           0 :                 freenfa(nfa);
      86                 :           0 :                 return NULL;
      87                 :             :         }
      88                 :        1931 :         rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
      89                 :        1931 :         newarc(nfa, '^', 1, nfa->pre, nfa->init);
      90                 :        1931 :         newarc(nfa, '^', 0, nfa->pre, nfa->init);
      91                 :        1931 :         rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
      92                 :        1931 :         newarc(nfa, '$', 1, nfa->final, nfa->post);
      93                 :        1931 :         newarc(nfa, '$', 0, nfa->final, nfa->post);
      94                 :             : 
      95         [ -  + ]:        1931 :         if (ISERR())
      96                 :             :         {
      97                 :           0 :                 freenfa(nfa);
      98                 :           0 :                 return NULL;
      99                 :             :         }
     100                 :        1931 :         return nfa;
     101                 :        1931 : }
     102                 :             : 
     103                 :             : /*
     104                 :             :  * freenfa - free an entire NFA
     105                 :             :  */
     106                 :             : static void
     107                 :        1931 : freenfa(struct nfa *nfa)
     108                 :             : {
     109                 :        1931 :         struct statebatch *sb;
     110                 :        1931 :         struct statebatch *sbnext;
     111                 :        1931 :         struct arcbatch *ab;
     112                 :        1931 :         struct arcbatch *abnext;
     113                 :             : 
     114         [ +  + ]:        3997 :         for (sb = nfa->lastsb; sb != NULL; sb = sbnext)
     115                 :             :         {
     116                 :        2066 :                 sbnext = sb->next;
     117                 :        2066 :                 nfa->v->spaceused -= STATEBATCHSIZE(sb->nstates);
     118                 :        2066 :                 FREE(sb);
     119                 :        2066 :         }
     120                 :        1931 :         nfa->lastsb = NULL;
     121         [ +  + ]:        6254 :         for (ab = nfa->lastab; ab != NULL; ab = abnext)
     122                 :             :         {
     123                 :        4323 :                 abnext = ab->next;
     124                 :        4323 :                 nfa->v->spaceused -= ARCBATCHSIZE(ab->narcs);
     125                 :        4323 :                 FREE(ab);
     126                 :        4323 :         }
     127                 :        1931 :         nfa->lastab = NULL;
     128                 :             : 
     129                 :        1931 :         nfa->nstates = -1;
     130                 :        1931 :         FREE(nfa);
     131                 :        1931 : }
     132                 :             : 
     133                 :             : /*
     134                 :             :  * newstate - allocate an NFA state, with zero flag value
     135                 :             :  */
     136                 :             : static struct state *                   /* NULL on error */
     137                 :       51367 : newstate(struct nfa *nfa)
     138                 :             : {
     139                 :       51367 :         struct state *s;
     140                 :             : 
     141                 :             :         /*
     142                 :             :          * This is a handy place to check for operation cancel during regex
     143                 :             :          * compilation, since no code path will go very long without making a new
     144                 :             :          * state or arc.
     145                 :             :          */
     146         [ +  - ]:       51367 :         INTERRUPT(nfa->v->re);
     147                 :             : 
     148                 :             :         /* first, recycle anything that's on the freelist */
     149         [ +  + ]:       51367 :         if (nfa->freestates != NULL)
     150                 :             :         {
     151                 :        2391 :                 s = nfa->freestates;
     152                 :        2391 :                 nfa->freestates = s->next;
     153                 :        2391 :         }
     154                 :             :         /* otherwise, is there anything left in the last statebatch? */
     155   [ +  +  +  + ]:       48976 :         else if (nfa->lastsb != NULL && nfa->lastsbused < nfa->lastsb->nstates)
     156                 :             :         {
     157                 :       46910 :                 s = &nfa->lastsb->s[nfa->lastsbused++];
     158                 :       46910 :         }
     159                 :             :         /* otherwise, need to allocate a new statebatch */
     160                 :             :         else
     161                 :             :         {
     162                 :        2066 :                 struct statebatch *newSb;
     163                 :        2066 :                 size_t          nstates;
     164                 :             : 
     165         [ -  + ]:        2066 :                 if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     166                 :             :                 {
     167         [ #  # ]:           0 :                         NERR(REG_ETOOBIG);
     168                 :           0 :                         return NULL;
     169                 :             :                 }
     170         [ +  + ]:        2066 :                 nstates = (nfa->lastsb != NULL) ? nfa->lastsb->nstates * 2 : FIRSTSBSIZE;
     171         [ +  + ]:        2066 :                 if (nstates > MAXSBSIZE)
     172                 :           8 :                         nstates = MAXSBSIZE;
     173                 :        2066 :                 newSb = (struct statebatch *) MALLOC(STATEBATCHSIZE(nstates));
     174         [ +  - ]:        2066 :                 if (newSb == NULL)
     175                 :             :                 {
     176         [ #  # ]:           0 :                         NERR(REG_ESPACE);
     177                 :           0 :                         return NULL;
     178                 :             :                 }
     179                 :        2066 :                 nfa->v->spaceused += STATEBATCHSIZE(nstates);
     180                 :        2066 :                 newSb->nstates = nstates;
     181                 :        2066 :                 newSb->next = nfa->lastsb;
     182                 :        2066 :                 nfa->lastsb = newSb;
     183                 :        2066 :                 nfa->lastsbused = 1;
     184                 :        2066 :                 s = &newSb->s[0];
     185         [ -  + ]:        2066 :         }
     186                 :             : 
     187         [ +  - ]:       51367 :         assert(nfa->nstates >= 0);
     188                 :       51367 :         s->no = nfa->nstates++;
     189                 :       51367 :         s->flag = 0;
     190         [ +  + ]:       51367 :         if (nfa->states == NULL)
     191                 :        1931 :                 nfa->states = s;
     192                 :       51367 :         s->nins = 0;
     193                 :       51367 :         s->ins = NULL;
     194                 :       51367 :         s->nouts = 0;
     195                 :       51367 :         s->outs = NULL;
     196                 :       51367 :         s->tmp = NULL;
     197                 :       51367 :         s->next = NULL;
     198         [ +  + ]:       51367 :         if (nfa->slast != NULL)
     199                 :             :         {
     200         [ +  - ]:       49436 :                 assert(nfa->slast->next == NULL);
     201                 :       49436 :                 nfa->slast->next = s;
     202                 :       49436 :         }
     203                 :       51367 :         s->prev = nfa->slast;
     204                 :       51367 :         nfa->slast = s;
     205                 :       51367 :         return s;
     206                 :       51367 : }
     207                 :             : 
     208                 :             : /*
     209                 :             :  * newfstate - allocate an NFA state with a specified flag value
     210                 :             :  */
     211                 :             : static struct state *                   /* NULL on error */
     212                 :        3862 : newfstate(struct nfa *nfa, int flag)
     213                 :             : {
     214                 :        3862 :         struct state *s;
     215                 :             : 
     216                 :        3862 :         s = newstate(nfa);
     217         [ -  + ]:        3862 :         if (s != NULL)
     218                 :        3862 :                 s->flag = (char) flag;
     219                 :        7724 :         return s;
     220                 :        3862 : }
     221                 :             : 
     222                 :             : /*
     223                 :             :  * dropstate - delete a state's inarcs and outarcs and free it
     224                 :             :  */
     225                 :             : static void
     226                 :       18614 : dropstate(struct nfa *nfa,
     227                 :             :                   struct state *s)
     228                 :             : {
     229                 :       18614 :         struct arc *a;
     230                 :             : 
     231         [ +  + ]:       20876 :         while ((a = s->ins) != NULL)
     232                 :        2262 :                 freearc(nfa, a);
     233         [ +  + ]:       31635 :         while ((a = s->outs) != NULL)
     234                 :       13021 :                 freearc(nfa, a);
     235                 :       18614 :         freestate(nfa, s);
     236                 :       18614 : }
     237                 :             : 
     238                 :             : /*
     239                 :             :  * freestate - free a state, which has no in-arcs or out-arcs
     240                 :             :  */
     241                 :             : static void
     242                 :       19904 : freestate(struct nfa *nfa,
     243                 :             :                   struct state *s)
     244                 :             : {
     245         [ +  - ]:       19904 :         assert(s != NULL);
     246         [ +  - ]:       19904 :         assert(s->nins == 0 && s->nouts == 0);
     247                 :             : 
     248                 :       19904 :         s->no = FREESTATE;
     249                 :       19904 :         s->flag = 0;
     250         [ +  + ]:       19904 :         if (s->next != NULL)
     251                 :       18797 :                 s->next->prev = s->prev;
     252                 :             :         else
     253                 :             :         {
     254         [ +  - ]:        1107 :                 assert(s == nfa->slast);
     255                 :        1107 :                 nfa->slast = s->prev;
     256                 :             :         }
     257         [ +  - ]:       19904 :         if (s->prev != NULL)
     258                 :       19904 :                 s->prev->next = s->next;
     259                 :             :         else
     260                 :             :         {
     261         [ #  # ]:           0 :                 assert(s == nfa->states);
     262                 :           0 :                 nfa->states = s->next;
     263                 :             :         }
     264                 :       19904 :         s->prev = NULL;
     265                 :       19904 :         s->next = nfa->freestates;        /* don't delete it, put it on the free list */
     266                 :       19904 :         nfa->freestates = s;
     267                 :       19904 : }
     268                 :             : 
     269                 :             : /*
     270                 :             :  * newarc - set up a new arc within an NFA
     271                 :             :  *
     272                 :             :  * This function checks to make sure that no duplicate arcs are created.
     273                 :             :  * In general we never want duplicates.
     274                 :             :  *
     275                 :             :  * However: in principle, a RAINBOW arc is redundant with any plain arc
     276                 :             :  * (unless that arc is for a pseudocolor).  But we don't try to recognize
     277                 :             :  * that redundancy, either here or in allied operations such as moveins().
     278                 :             :  * The pseudocolor consideration makes that more costly than it seems worth.
     279                 :             :  */
     280                 :             : static void
     281                 :      111288 : newarc(struct nfa *nfa,
     282                 :             :            int t,
     283                 :             :            color co,
     284                 :             :            struct state *from,
     285                 :             :            struct state *to)
     286                 :             : {
     287                 :      111288 :         struct arc *a;
     288                 :             : 
     289         [ +  - ]:      111288 :         assert(from != NULL && to != NULL);
     290                 :             : 
     291                 :             :         /*
     292                 :             :          * This is a handy place to check for operation cancel during regex
     293                 :             :          * compilation, since no code path will go very long without making a new
     294                 :             :          * state or arc.
     295                 :             :          */
     296         [ +  - ]:      111288 :         INTERRUPT(nfa->v->re);
     297                 :             : 
     298                 :             :         /* check for duplicate arc, using whichever chain is shorter */
     299         [ +  + ]:      111288 :         if (from->nouts <= to->nins)
     300                 :             :         {
     301         [ +  + ]:      327700 :                 for (a = from->outs; a != NULL; a = a->outchain)
     302   [ +  +  +  +  :      244823 :                         if (a->to == to && a->co == co && a->type == t)
                   +  + ]
     303                 :        1988 :                                 return;
     304                 :       82877 :         }
     305                 :             :         else
     306                 :             :         {
     307         [ +  + ]:      326607 :                 for (a = to->ins; a != NULL; a = a->inchain)
     308   [ +  +  +  +  :      302419 :                         if (a->from == from && a->co == co && a->type == t)
                   +  + ]
     309                 :        2235 :                                 return;
     310                 :             :         }
     311                 :             : 
     312                 :             :         /* no dup, so create the arc */
     313                 :      107065 :         createarc(nfa, t, co, from, to);
     314         [ -  + ]:      111288 : }
     315                 :             : 
     316                 :             : /*
     317                 :             :  * createarc - create a new arc within an NFA
     318                 :             :  *
     319                 :             :  * This function must *only* be used after verifying that there is no existing
     320                 :             :  * identical arc (same type/color/from/to).
     321                 :             :  */
     322                 :             : static void
     323                 :     2520509 : createarc(struct nfa *nfa,
     324                 :             :                   int t,
     325                 :             :                   color co,
     326                 :             :                   struct state *from,
     327                 :             :                   struct state *to)
     328                 :             : {
     329                 :     2520509 :         struct arc *a;
     330                 :             : 
     331                 :     2520509 :         a = allocarc(nfa);
     332         [ +  + ]:     2520509 :         if (NISERR())
     333                 :        1903 :                 return;
     334         [ +  - ]:     2518606 :         assert(a != NULL);
     335                 :             : 
     336                 :     2518606 :         a->type = t;
     337                 :     2518606 :         a->co = co;
     338                 :     2518606 :         a->to = to;
     339                 :     2518606 :         a->from = from;
     340                 :             : 
     341                 :             :         /*
     342                 :             :          * Put the new arc on the beginning, not the end, of the chains; it's
     343                 :             :          * simpler here, and freearc() is the same cost either way.  See also the
     344                 :             :          * logic in moveins() and its cohorts, as well as fixempties().
     345                 :             :          */
     346                 :     2518606 :         a->inchain = to->ins;
     347                 :     2518606 :         a->inchainRev = NULL;
     348         [ +  + ]:     2518606 :         if (to->ins)
     349                 :     2453767 :                 to->ins->inchainRev = a;
     350                 :     2518606 :         to->ins = a;
     351                 :     2518606 :         a->outchain = from->outs;
     352                 :     2518606 :         a->outchainRev = NULL;
     353         [ +  + ]:     2518606 :         if (from->outs)
     354                 :     2464542 :                 from->outs->outchainRev = a;
     355                 :     2518606 :         from->outs = a;
     356                 :             : 
     357                 :     2518606 :         from->nouts++;
     358                 :     2518606 :         to->nins++;
     359                 :             : 
     360   [ +  +  +  +  :     2518606 :         if (COLORED(a) && nfa->parent == NULL)
             +  +  +  + ]
     361                 :       37876 :                 colorchain(nfa->cm, a);
     362         [ -  + ]:     2520509 : }
     363                 :             : 
     364                 :             : /*
     365                 :             :  * allocarc - allocate a new arc within an NFA
     366                 :             :  */
     367                 :             : static struct arc *                             /* NULL for failure */
     368                 :     2520509 : allocarc(struct nfa *nfa)
     369                 :             : {
     370                 :     2520509 :         struct arc *a;
     371                 :             : 
     372                 :             :         /* first, recycle anything that's on the freelist */
     373         [ +  + ]:     2520509 :         if (nfa->freearcs != NULL)
     374                 :             :         {
     375                 :       76386 :                 a = nfa->freearcs;
     376                 :       76386 :                 nfa->freearcs = a->freechain;
     377                 :       76386 :         }
     378                 :             :         /* otherwise, is there anything left in the last arcbatch? */
     379   [ +  +  +  + ]:     2444123 :         else if (nfa->lastab != NULL && nfa->lastabused < nfa->lastab->narcs)
     380                 :             :         {
     381                 :     2437897 :                 a = &nfa->lastab->a[nfa->lastabused++];
     382                 :     2437897 :         }
     383                 :             :         /* otherwise, need to allocate a new arcbatch */
     384                 :             :         else
     385                 :             :         {
     386                 :        6226 :                 struct arcbatch *newAb;
     387                 :        6226 :                 size_t          narcs;
     388                 :             : 
     389         [ +  + ]:        6226 :                 if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
     390                 :             :                 {
     391         [ +  + ]:        1903 :                         NERR(REG_ETOOBIG);
     392                 :        1903 :                         return NULL;
     393                 :             :                 }
     394         [ +  + ]:        4323 :                 narcs = (nfa->lastab != NULL) ? nfa->lastab->narcs * 2 : FIRSTABSIZE;
     395         [ +  + ]:        4323 :                 if (narcs > MAXABSIZE)
     396                 :        2332 :                         narcs = MAXABSIZE;
     397                 :        4323 :                 newAb = (struct arcbatch *) MALLOC(ARCBATCHSIZE(narcs));
     398         [ +  - ]:        4323 :                 if (newAb == NULL)
     399                 :             :                 {
     400         [ #  # ]:           0 :                         NERR(REG_ESPACE);
     401                 :           0 :                         return NULL;
     402                 :             :                 }
     403                 :        4323 :                 nfa->v->spaceused += ARCBATCHSIZE(narcs);
     404                 :        4323 :                 newAb->narcs = narcs;
     405                 :        4323 :                 newAb->next = nfa->lastab;
     406                 :        4323 :                 nfa->lastab = newAb;
     407                 :        4323 :                 nfa->lastabused = 1;
     408                 :        4323 :                 a = &newAb->a[0];
     409         [ +  + ]:        6226 :         }
     410                 :             : 
     411                 :     2518606 :         return a;
     412                 :     2520509 : }
     413                 :             : 
     414                 :             : /*
     415                 :             :  * freearc - free an arc
     416                 :             :  */
     417                 :             : static void
     418                 :       95486 : freearc(struct nfa *nfa,
     419                 :             :                 struct arc *victim)
     420                 :             : {
     421                 :       95486 :         struct state *from = victim->from;
     422                 :       95486 :         struct state *to = victim->to;
     423                 :       95486 :         struct arc *predecessor;
     424                 :             : 
     425         [ +  - ]:       95486 :         assert(victim->type != 0);
     426                 :             : 
     427                 :             :         /* take it off color chain if necessary */
     428   [ +  +  +  +  :       95486 :         if (COLORED(victim) && nfa->parent == NULL)
             +  +  +  + ]
     429                 :       22639 :                 uncolorchain(nfa->cm, victim);
     430                 :             : 
     431                 :             :         /* take it off source's out-chain */
     432         [ +  - ]:       95486 :         assert(from != NULL);
     433                 :       95486 :         predecessor = victim->outchainRev;
     434         [ +  + ]:       95486 :         if (predecessor == NULL)
     435                 :             :         {
     436         [ +  - ]:       36982 :                 assert(from->outs == victim);
     437                 :       36982 :                 from->outs = victim->outchain;
     438                 :       36982 :         }
     439                 :             :         else
     440                 :             :         {
     441         [ +  - ]:       58504 :                 assert(predecessor->outchain == victim);
     442                 :       58504 :                 predecessor->outchain = victim->outchain;
     443                 :             :         }
     444         [ +  + ]:       95486 :         if (victim->outchain != NULL)
     445                 :             :         {
     446         [ +  - ]:       34542 :                 assert(victim->outchain->outchainRev == victim);
     447                 :       34542 :                 victim->outchain->outchainRev = predecessor;
     448                 :       34542 :         }
     449                 :       95486 :         from->nouts--;
     450                 :             : 
     451                 :             :         /* take it off target's in-chain */
     452         [ +  - ]:       95486 :         assert(to != NULL);
     453                 :       95486 :         predecessor = victim->inchainRev;
     454         [ +  + ]:       95486 :         if (predecessor == NULL)
     455                 :             :         {
     456         [ +  - ]:       55176 :                 assert(to->ins == victim);
     457                 :       55176 :                 to->ins = victim->inchain;
     458                 :       55176 :         }
     459                 :             :         else
     460                 :             :         {
     461         [ +  - ]:       40310 :                 assert(predecessor->inchain == victim);
     462                 :       40310 :                 predecessor->inchain = victim->inchain;
     463                 :             :         }
     464         [ +  + ]:       95486 :         if (victim->inchain != NULL)
     465                 :             :         {
     466         [ +  - ]:       39043 :                 assert(victim->inchain->inchainRev == victim);
     467                 :       39043 :                 victim->inchain->inchainRev = predecessor;
     468                 :       39043 :         }
     469                 :       95486 :         to->nins--;
     470                 :             : 
     471                 :             :         /* clean up and place on NFA's free list */
     472                 :       95486 :         victim->type = 0;
     473                 :       95486 :         victim->from = NULL;         /* precautions... */
     474                 :       95486 :         victim->to = NULL;
     475                 :       95486 :         victim->inchain = NULL;
     476                 :       95486 :         victim->inchainRev = NULL;
     477                 :       95486 :         victim->outchain = NULL;
     478                 :       95486 :         victim->outchainRev = NULL;
     479                 :       95486 :         victim->freechain = nfa->freearcs;
     480                 :       95486 :         nfa->freearcs = victim;
     481                 :       95486 : }
     482                 :             : 
     483                 :             : /*
     484                 :             :  * changearcsource - flip an arc to have a different from state
     485                 :             :  *
     486                 :             :  * Caller must have verified that there is no pre-existing duplicate arc.
     487                 :             :  */
     488                 :             : static void
     489                 :           0 : changearcsource(struct arc *a, struct state *newfrom)
     490                 :             : {
     491                 :           0 :         struct state *oldfrom = a->from;
     492                 :           0 :         struct arc *predecessor;
     493                 :             : 
     494         [ #  # ]:           0 :         assert(oldfrom != newfrom);
     495                 :             : 
     496                 :             :         /* take it off old source's out-chain */
     497         [ #  # ]:           0 :         assert(oldfrom != NULL);
     498                 :           0 :         predecessor = a->outchainRev;
     499         [ #  # ]:           0 :         if (predecessor == NULL)
     500                 :             :         {
     501         [ #  # ]:           0 :                 assert(oldfrom->outs == a);
     502                 :           0 :                 oldfrom->outs = a->outchain;
     503                 :           0 :         }
     504                 :             :         else
     505                 :             :         {
     506         [ #  # ]:           0 :                 assert(predecessor->outchain == a);
     507                 :           0 :                 predecessor->outchain = a->outchain;
     508                 :             :         }
     509         [ #  # ]:           0 :         if (a->outchain != NULL)
     510                 :             :         {
     511         [ #  # ]:           0 :                 assert(a->outchain->outchainRev == a);
     512                 :           0 :                 a->outchain->outchainRev = predecessor;
     513                 :           0 :         }
     514                 :           0 :         oldfrom->nouts--;
     515                 :             : 
     516                 :           0 :         a->from = newfrom;
     517                 :             : 
     518                 :             :         /* prepend it to new source's out-chain */
     519                 :           0 :         a->outchain = newfrom->outs;
     520                 :           0 :         a->outchainRev = NULL;
     521         [ #  # ]:           0 :         if (newfrom->outs)
     522                 :           0 :                 newfrom->outs->outchainRev = a;
     523                 :           0 :         newfrom->outs = a;
     524                 :           0 :         newfrom->nouts++;
     525                 :           0 : }
     526                 :             : 
     527                 :             : /*
     528                 :             :  * changearctarget - flip an arc to have a different to state
     529                 :             :  *
     530                 :             :  * Caller must have verified that there is no pre-existing duplicate arc.
     531                 :             :  */
     532                 :             : static void
     533                 :           0 : changearctarget(struct arc *a, struct state *newto)
     534                 :             : {
     535                 :           0 :         struct state *oldto = a->to;
     536                 :           0 :         struct arc *predecessor;
     537                 :             : 
     538         [ #  # ]:           0 :         assert(oldto != newto);
     539                 :             : 
     540                 :             :         /* take it off old target's in-chain */
     541         [ #  # ]:           0 :         assert(oldto != NULL);
     542                 :           0 :         predecessor = a->inchainRev;
     543         [ #  # ]:           0 :         if (predecessor == NULL)
     544                 :             :         {
     545         [ #  # ]:           0 :                 assert(oldto->ins == a);
     546                 :           0 :                 oldto->ins = a->inchain;
     547                 :           0 :         }
     548                 :             :         else
     549                 :             :         {
     550         [ #  # ]:           0 :                 assert(predecessor->inchain == a);
     551                 :           0 :                 predecessor->inchain = a->inchain;
     552                 :             :         }
     553         [ #  # ]:           0 :         if (a->inchain != NULL)
     554                 :             :         {
     555         [ #  # ]:           0 :                 assert(a->inchain->inchainRev == a);
     556                 :           0 :                 a->inchain->inchainRev = predecessor;
     557                 :           0 :         }
     558                 :           0 :         oldto->nins--;
     559                 :             : 
     560                 :           0 :         a->to = newto;
     561                 :             : 
     562                 :             :         /* prepend it to new target's in-chain */
     563                 :           0 :         a->inchain = newto->ins;
     564                 :           0 :         a->inchainRev = NULL;
     565         [ #  # ]:           0 :         if (newto->ins)
     566                 :           0 :                 newto->ins->inchainRev = a;
     567                 :           0 :         newto->ins = a;
     568                 :           0 :         newto->nins++;
     569                 :           0 : }
     570                 :             : 
     571                 :             : /*
     572                 :             :  * hasnonemptyout - Does state have a non-EMPTY out arc?
     573                 :             :  */
     574                 :             : static int
     575                 :       23831 : hasnonemptyout(struct state *s)
     576                 :             : {
     577                 :       23831 :         struct arc *a;
     578                 :             : 
     579         [ +  + ]:       27056 :         for (a = s->outs; a != NULL; a = a->outchain)
     580                 :             :         {
     581         [ +  + ]:       26692 :                 if (a->type != EMPTY)
     582                 :       23467 :                         return 1;
     583                 :        3225 :         }
     584                 :         364 :         return 0;
     585                 :       23831 : }
     586                 :             : 
     587                 :             : /*
     588                 :             :  * findarc - find arc, if any, from given source with given type and color
     589                 :             :  * If there is more than one such arc, the result is random.
     590                 :             :  */
     591                 :             : static struct arc *
     592                 :          37 : findarc(struct state *s,
     593                 :             :                 int type,
     594                 :             :                 color co)
     595                 :             : {
     596                 :          37 :         struct arc *a;
     597                 :             : 
     598         [ +  + ]:          98 :         for (a = s->outs; a != NULL; a = a->outchain)
     599   [ +  -  -  + ]:          61 :                 if (a->type == type && a->co == co)
     600                 :           0 :                         return a;
     601                 :          37 :         return NULL;
     602                 :          37 : }
     603                 :             : 
     604                 :             : /*
     605                 :             :  * cparc - allocate a new arc within an NFA, copying details from old one
     606                 :             :  */
     607                 :             : static void
     608                 :       67920 : cparc(struct nfa *nfa,
     609                 :             :           struct arc *oa,
     610                 :             :           struct state *from,
     611                 :             :           struct state *to)
     612                 :             : {
     613                 :       67920 :         newarc(nfa, oa->type, oa->co, from, to);
     614                 :       67920 : }
     615                 :             : 
     616                 :             : /*
     617                 :             :  * sortins - sort the in arcs of a state by from/color/type
     618                 :             :  */
     619                 :             : static void
     620                 :        3116 : sortins(struct nfa *nfa,
     621                 :             :                 struct state *s)
     622                 :             : {
     623                 :        3116 :         struct arc **sortarray;
     624                 :        3116 :         struct arc *a;
     625                 :        3116 :         int                     n = s->nins;
     626                 :        3116 :         int                     i;
     627                 :             : 
     628         [ -  + ]:        3116 :         if (n <= 1)
     629                 :           0 :                 return;                                 /* nothing to do */
     630                 :             :         /* make an array of arc pointers ... */
     631                 :        3116 :         sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     632         [ +  - ]:        3116 :         if (sortarray == NULL)
     633                 :             :         {
     634         [ #  # ]:           0 :                 NERR(REG_ESPACE);
     635                 :           0 :                 return;
     636                 :             :         }
     637                 :        3116 :         i = 0;
     638         [ +  + ]:        9984 :         for (a = s->ins; a != NULL; a = a->inchain)
     639                 :        6868 :                 sortarray[i++] = a;
     640         [ +  - ]:        3116 :         assert(i == n);
     641                 :             :         /* ... sort the array */
     642                 :        3116 :         qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
     643                 :             :         /* ... and rebuild arc list in order */
     644                 :             :         /* it seems worth special-casing first and last items to simplify loop */
     645                 :        3116 :         a = sortarray[0];
     646                 :        3116 :         s->ins = a;
     647                 :        3116 :         a->inchain = sortarray[1];
     648                 :        3116 :         a->inchainRev = NULL;
     649         [ +  + ]:        3752 :         for (i = 1; i < n - 1; i++)
     650                 :             :         {
     651                 :         636 :                 a = sortarray[i];
     652                 :         636 :                 a->inchain = sortarray[i + 1];
     653                 :         636 :                 a->inchainRev = sortarray[i - 1];
     654                 :         636 :         }
     655                 :        3116 :         a = sortarray[i];
     656                 :        3116 :         a->inchain = NULL;
     657                 :        3116 :         a->inchainRev = sortarray[i - 1];
     658                 :        3116 :         FREE(sortarray);
     659         [ -  + ]:        3116 : }
     660                 :             : 
     661                 :             : static int
     662                 :    11979387 : sortins_cmp(const void *a, const void *b)
     663                 :             : {
     664                 :    11979387 :         const struct arc *aa = *((const struct arc *const *) a);
     665                 :    11979387 :         const struct arc *bb = *((const struct arc *const *) b);
     666                 :             : 
     667                 :             :         /* we check the fields in the order they are most likely to be different */
     668         [ +  + ]:    11979387 :         if (aa->from->no < bb->from->no)
     669                 :     9549529 :                 return -1;
     670         [ +  + ]:     2429858 :         if (aa->from->no > bb->from->no)
     671                 :     2379273 :                 return 1;
     672         [ +  + ]:       50585 :         if (aa->co < bb->co)
     673                 :       28062 :                 return -1;
     674         [ +  + ]:       22523 :         if (aa->co > bb->co)
     675                 :       22234 :                 return 1;
     676         [ +  + ]:         289 :         if (aa->type < bb->type)
     677                 :          13 :                 return -1;
     678         [ +  + ]:         276 :         if (aa->type > bb->type)
     679                 :           5 :                 return 1;
     680                 :         271 :         return 0;
     681                 :    11979387 : }
     682                 :             : 
     683                 :             : /*
     684                 :             :  * sortouts - sort the out arcs of a state by to/color/type
     685                 :             :  */
     686                 :             : static void
     687                 :           0 : sortouts(struct nfa *nfa,
     688                 :             :                  struct state *s)
     689                 :             : {
     690                 :           0 :         struct arc **sortarray;
     691                 :           0 :         struct arc *a;
     692                 :           0 :         int                     n = s->nouts;
     693                 :           0 :         int                     i;
     694                 :             : 
     695         [ #  # ]:           0 :         if (n <= 1)
     696                 :           0 :                 return;                                 /* nothing to do */
     697                 :             :         /* make an array of arc pointers ... */
     698                 :           0 :         sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
     699         [ #  # ]:           0 :         if (sortarray == NULL)
     700                 :             :         {
     701         [ #  # ]:           0 :                 NERR(REG_ESPACE);
     702                 :           0 :                 return;
     703                 :             :         }
     704                 :           0 :         i = 0;
     705         [ #  # ]:           0 :         for (a = s->outs; a != NULL; a = a->outchain)
     706                 :           0 :                 sortarray[i++] = a;
     707         [ #  # ]:           0 :         assert(i == n);
     708                 :             :         /* ... sort the array */
     709                 :           0 :         qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
     710                 :             :         /* ... and rebuild arc list in order */
     711                 :             :         /* it seems worth special-casing first and last items to simplify loop */
     712                 :           0 :         a = sortarray[0];
     713                 :           0 :         s->outs = a;
     714                 :           0 :         a->outchain = sortarray[1];
     715                 :           0 :         a->outchainRev = NULL;
     716         [ #  # ]:           0 :         for (i = 1; i < n - 1; i++)
     717                 :             :         {
     718                 :           0 :                 a = sortarray[i];
     719                 :           0 :                 a->outchain = sortarray[i + 1];
     720                 :           0 :                 a->outchainRev = sortarray[i - 1];
     721                 :           0 :         }
     722                 :           0 :         a = sortarray[i];
     723                 :           0 :         a->outchain = NULL;
     724                 :           0 :         a->outchainRev = sortarray[i - 1];
     725                 :           0 :         FREE(sortarray);
     726         [ #  # ]:           0 : }
     727                 :             : 
     728                 :             : static int
     729                 :           0 : sortouts_cmp(const void *a, const void *b)
     730                 :             : {
     731                 :           0 :         const struct arc *aa = *((const struct arc *const *) a);
     732                 :           0 :         const struct arc *bb = *((const struct arc *const *) b);
     733                 :             : 
     734                 :             :         /* we check the fields in the order they are most likely to be different */
     735         [ #  # ]:           0 :         if (aa->to->no < bb->to->no)
     736                 :           0 :                 return -1;
     737         [ #  # ]:           0 :         if (aa->to->no > bb->to->no)
     738                 :           0 :                 return 1;
     739         [ #  # ]:           0 :         if (aa->co < bb->co)
     740                 :           0 :                 return -1;
     741         [ #  # ]:           0 :         if (aa->co > bb->co)
     742                 :           0 :                 return 1;
     743         [ #  # ]:           0 :         if (aa->type < bb->type)
     744                 :           0 :                 return -1;
     745         [ #  # ]:           0 :         if (aa->type > bb->type)
     746                 :           0 :                 return 1;
     747                 :           0 :         return 0;
     748                 :           0 : }
     749                 :             : 
     750                 :             : /*
     751                 :             :  * Common decision logic about whether to use arc-by-arc operations or
     752                 :             :  * sort/merge.  If there's just a few source arcs we cannot recoup the
     753                 :             :  * cost of sorting the destination arc list, no matter how large it is.
     754                 :             :  * Otherwise, limit the number of arc-by-arc comparisons to about 1000
     755                 :             :  * (a somewhat arbitrary choice, but the breakeven point would probably
     756                 :             :  * be machine dependent anyway).
     757                 :             :  */
     758                 :             : #define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
     759                 :             :         ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
     760                 :             : 
     761                 :             : /*
     762                 :             :  * moveins - move all in arcs of a state to another state
     763                 :             :  *
     764                 :             :  * You might think this could be done better by just updating the
     765                 :             :  * existing arcs, and you would be right if it weren't for the need
     766                 :             :  * for duplicate suppression, which makes it easier to just make new
     767                 :             :  * ones to exploit the suppression built into newarc.
     768                 :             :  *
     769                 :             :  * However, if we have a whole lot of arcs to deal with, retail duplicate
     770                 :             :  * checks become too slow.  In that case we proceed by sorting and merging
     771                 :             :  * the arc lists, and then we can indeed just update the arcs in-place.
     772                 :             :  *
     773                 :             :  * On the other hand, it's also true that this is frequently called with
     774                 :             :  * a brand-new newState that has no existing in-arcs.  In that case,
     775                 :             :  * de-duplication is unnecessary, so we can just blindly move all the arcs.
     776                 :             :  */
     777                 :             : static void
     778                 :       28218 : moveins(struct nfa *nfa,
     779                 :             :                 struct state *oldState,
     780                 :             :                 struct state *newState)
     781                 :             : {
     782         [ +  - ]:       28218 :         assert(oldState != newState);
     783                 :             : 
     784         [ +  + ]:       28218 :         if (newState->nins == 0)
     785                 :             :         {
     786                 :             :                 /* No need for de-duplication */
     787                 :       13933 :                 struct arc *a;
     788                 :             : 
     789         [ +  + ]:       28038 :                 while ((a = oldState->ins) != NULL)
     790                 :             :                 {
     791                 :       14105 :                         createarc(nfa, a->type, a->co, a->from, newState);
     792                 :       14105 :                         freearc(nfa, a);
     793                 :             :                 }
     794                 :       13933 :         }
     795   [ +  +  +  +  :       14285 :         else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
                   +  - ]
     796                 :             :         {
     797                 :             :                 /* With not too many arcs, just do them one at a time */
     798                 :       14127 :                 struct arc *a;
     799                 :             : 
     800         [ +  + ]:       31006 :                 while ((a = oldState->ins) != NULL)
     801                 :             :                 {
     802                 :       16879 :                         cparc(nfa, a, a->from, newState);
     803                 :       16879 :                         freearc(nfa, a);
     804                 :             :                 }
     805                 :       14127 :         }
     806                 :             :         else
     807                 :             :         {
     808                 :             :                 /*
     809                 :             :                  * With many arcs, use a sort-merge approach.  Note changearctarget()
     810                 :             :                  * will put the arc onto the front of newState's chain, so it does not
     811                 :             :                  * break our walk through the sorted part of the chain.
     812                 :             :                  */
     813                 :         158 :                 struct arc *oa;
     814                 :         158 :                 struct arc *na;
     815                 :             : 
     816                 :             :                 /*
     817                 :             :                  * Because we bypass newarc() in this code path, we'd better include a
     818                 :             :                  * cancel check.
     819                 :             :                  */
     820         [ +  - ]:         158 :                 INTERRUPT(nfa->v->re);
     821                 :             : 
     822                 :           8 :                 sortins(nfa, oldState);
     823                 :           8 :                 sortins(nfa, newState);
     824         [ -  + ]:           8 :                 if (NISERR())
     825                 :           0 :                         return;                         /* might have failed to sort */
     826                 :           8 :                 oa = oldState->ins;
     827                 :           8 :                 na = newState->ins;
     828   [ +  +  +  + ]:         296 :                 while (oa != NULL && na != NULL)
     829                 :             :                 {
     830                 :         288 :                         struct arc *a = oa;
     831                 :             : 
     832   [ +  -  -  + ]:         288 :                         switch (sortins_cmp(&oa, &na))
     833                 :             :                         {
     834                 :             :                                 case -1:
     835                 :             :                                         /* newState does not have anything matching oa */
     836                 :           0 :                                         oa = oa->inchain;
     837                 :             : 
     838                 :             :                                         /*
     839                 :             :                                          * Rather than doing createarc+freearc, we can just unlink
     840                 :             :                                          * and relink the existing arc struct.
     841                 :             :                                          */
     842                 :           0 :                                         changearctarget(a, newState);
     843                 :           0 :                                         break;
     844                 :             :                                 case 0:
     845                 :             :                                         /* match, advance in both lists */
     846                 :          44 :                                         oa = oa->inchain;
     847                 :          44 :                                         na = na->inchain;
     848                 :             :                                         /* ... and drop duplicate arc from oldState */
     849                 :          44 :                                         freearc(nfa, a);
     850                 :          44 :                                         break;
     851                 :             :                                 case +1:
     852                 :             :                                         /* advance only na; oa might have a match later */
     853                 :         244 :                                         na = na->inchain;
     854                 :         244 :                                         break;
     855                 :             :                                 default:
     856                 :           0 :                                         assert(NOTREACHED);
     857                 :           0 :                         }
     858                 :         288 :                 }
     859         [ -  + ]:           8 :                 while (oa != NULL)
     860                 :             :                 {
     861                 :             :                         /* newState does not have anything matching oa */
     862                 :           0 :                         struct arc *a = oa;
     863                 :             : 
     864                 :           0 :                         oa = oa->inchain;
     865                 :           0 :                         changearctarget(a, newState);
     866                 :           0 :                 }
     867      [ -  -  + ]:           8 :         }
     868                 :             : 
     869         [ +  - ]:       28068 :         assert(oldState->nins == 0);
     870         [ +  - ]:       28068 :         assert(oldState->ins == NULL);
     871                 :       28068 : }
     872                 :             : 
     873                 :             : /*
     874                 :             :  * copyins - copy in arcs of a state to another state
     875                 :             :  *
     876                 :             :  * The comments for moveins() apply here as well.  However, in current
     877                 :             :  * usage, this is *only* called with brand-new target states, so that
     878                 :             :  * only the "no need for de-duplication" code path is ever reached.
     879                 :             :  * We keep the rest #ifdef'd out in case it's needed in the future.
     880                 :             :  */
     881                 :             : static void
     882                 :         951 : copyins(struct nfa *nfa,
     883                 :             :                 struct state *oldState,
     884                 :             :                 struct state *newState)
     885                 :             : {
     886         [ +  - ]:         951 :         assert(oldState != newState);
     887         [ +  - ]:         951 :         assert(newState->nins == 0); /* see comment above */
     888                 :             : 
     889         [ +  - ]:         951 :         if (newState->nins == 0)
     890                 :             :         {
     891                 :             :                 /* No need for de-duplication */
     892                 :         951 :                 struct arc *a;
     893                 :             : 
     894         [ +  + ]:        8153 :                 for (a = oldState->ins; a != NULL; a = a->inchain)
     895                 :        7202 :                         createarc(nfa, a->type, a->co, a->from, newState);
     896                 :         951 :         }
     897                 :             : #ifdef NOT_USED                                 /* see comment above */
     898                 :             :         else if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
     899                 :             :         {
     900                 :             :                 /* With not too many arcs, just do them one at a time */
     901                 :             :                 struct arc *a;
     902                 :             : 
     903                 :             :                 for (a = oldState->ins; a != NULL; a = a->inchain)
     904                 :             :                         cparc(nfa, a, a->from, newState);
     905                 :             :         }
     906                 :             :         else
     907                 :             :         {
     908                 :             :                 /*
     909                 :             :                  * With many arcs, use a sort-merge approach.  Note that createarc()
     910                 :             :                  * will put new arcs onto the front of newState's chain, so it does
     911                 :             :                  * not break our walk through the sorted part of the chain.
     912                 :             :                  */
     913                 :             :                 struct arc *oa;
     914                 :             :                 struct arc *na;
     915                 :             : 
     916                 :             :                 /*
     917                 :             :                  * Because we bypass newarc() in this code path, we'd better include a
     918                 :             :                  * cancel check.
     919                 :             :                  */
     920                 :             :                 INTERRUPT(nfa->v->re);
     921                 :             : 
     922                 :             :                 sortins(nfa, oldState);
     923                 :             :                 sortins(nfa, newState);
     924                 :             :                 if (NISERR())
     925                 :             :                         return;                         /* might have failed to sort */
     926                 :             :                 oa = oldState->ins;
     927                 :             :                 na = newState->ins;
     928                 :             :                 while (oa != NULL && na != NULL)
     929                 :             :                 {
     930                 :             :                         struct arc *a = oa;
     931                 :             : 
     932                 :             :                         switch (sortins_cmp(&oa, &na))
     933                 :             :                         {
     934                 :             :                                 case -1:
     935                 :             :                                         /* newState does not have anything matching oa */
     936                 :             :                                         oa = oa->inchain;
     937                 :             :                                         createarc(nfa, a->type, a->co, a->from, newState);
     938                 :             :                                         break;
     939                 :             :                                 case 0:
     940                 :             :                                         /* match, advance in both lists */
     941                 :             :                                         oa = oa->inchain;
     942                 :             :                                         na = na->inchain;
     943                 :             :                                         break;
     944                 :             :                                 case +1:
     945                 :             :                                         /* advance only na; oa might have a match later */
     946                 :             :                                         na = na->inchain;
     947                 :             :                                         break;
     948                 :             :                                 default:
     949                 :             :                                         assert(NOTREACHED);
     950                 :             :                         }
     951                 :             :                 }
     952                 :             :                 while (oa != NULL)
     953                 :             :                 {
     954                 :             :                         /* newState does not have anything matching oa */
     955                 :             :                         struct arc *a = oa;
     956                 :             : 
     957                 :             :                         oa = oa->inchain;
     958                 :             :                         createarc(nfa, a->type, a->co, a->from, newState);
     959                 :             :                 }
     960                 :             :         }
     961                 :             : #endif                                                  /* NOT_USED */
     962                 :         951 : }
     963                 :             : 
     964                 :             : /*
     965                 :             :  * mergeins - merge a list of inarcs into a state
     966                 :             :  *
     967                 :             :  * This is much like copyins, but the source arcs are listed in an array,
     968                 :             :  * and are not guaranteed unique.  It's okay to clobber the array contents.
     969                 :             :  */
     970                 :             : static void
     971                 :       27315 : mergeins(struct nfa *nfa,
     972                 :             :                  struct state *s,
     973                 :             :                  struct arc **arcarray,
     974                 :             :                  int arccount)
     975                 :             : {
     976                 :       27315 :         struct arc *na;
     977                 :       27315 :         int                     i;
     978                 :       27315 :         int                     j;
     979                 :             : 
     980         [ +  + ]:       27315 :         if (arccount <= 0)
     981                 :       24215 :                 return;
     982                 :             : 
     983                 :             :         /*
     984                 :             :          * Because we bypass newarc() in this code path, we'd better include a
     985                 :             :          * cancel check.
     986                 :             :          */
     987         [ +  - ]:        3100 :         INTERRUPT(nfa->v->re);
     988                 :             : 
     989                 :             :         /* Sort existing inarcs as well as proposed new ones */
     990                 :        3100 :         sortins(nfa, s);
     991         [ -  + ]:        3100 :         if (NISERR())
     992                 :           0 :                 return;                                 /* might have failed to sort */
     993                 :             : 
     994                 :        3100 :         qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
     995                 :             : 
     996                 :             :         /*
     997                 :             :          * arcarray very likely includes dups, so we must eliminate them.  (This
     998                 :             :          * could be folded into the next loop, but it's not worth the trouble.)
     999                 :             :          */
    1000                 :        3100 :         j = 0;
    1001         [ +  + ]:     2380853 :         for (i = 1; i < arccount; i++)
    1002                 :             :         {
    1003      [ -  +  + ]:     2377753 :                 switch (sortins_cmp(&arcarray[j], &arcarray[i]))
    1004                 :             :                 {
    1005                 :             :                         case -1:
    1006                 :             :                                 /* non-dup */
    1007                 :     2377645 :                                 arcarray[++j] = arcarray[i];
    1008                 :     2377645 :                                 break;
    1009                 :             :                         case 0:
    1010                 :             :                                 /* dup */
    1011                 :             :                                 break;
    1012                 :             :                         default:
    1013                 :             :                                 /* trouble */
    1014                 :           0 :                                 assert(NOTREACHED);
    1015                 :           0 :                 }
    1016                 :     2377753 :         }
    1017                 :        3100 :         arccount = j + 1;
    1018                 :             : 
    1019                 :             :         /*
    1020                 :             :          * Now merge into s' inchain.  Note that createarc() will put new arcs
    1021                 :             :          * onto the front of s's chain, so it does not break our walk through the
    1022                 :             :          * sorted part of the chain.
    1023                 :             :          */
    1024                 :        3100 :         i = 0;
    1025                 :        3100 :         na = s->ins;
    1026   [ +  +  +  + ]:     2380772 :         while (i < arccount && na != NULL)
    1027                 :             :         {
    1028                 :     2377672 :                 struct arc *a = arcarray[i];
    1029                 :             : 
    1030   [ +  -  +  + ]:     2377672 :                 switch (sortins_cmp(&a, &na))
    1031                 :             :                 {
    1032                 :             :                         case -1:
    1033                 :             :                                 /* s does not have anything matching a */
    1034                 :     2374291 :                                 createarc(nfa, a->type, a->co, a->from, s);
    1035                 :     2374291 :                                 i++;
    1036                 :     2374291 :                                 break;
    1037                 :             :                         case 0:
    1038                 :             :                                 /* match, advance in both lists */
    1039                 :           2 :                                 i++;
    1040                 :           2 :                                 na = na->inchain;
    1041                 :           2 :                                 break;
    1042                 :             :                         case +1:
    1043                 :             :                                 /* advance only na; array might have a match later */
    1044                 :        3379 :                                 na = na->inchain;
    1045                 :        3379 :                                 break;
    1046                 :             :                         default:
    1047                 :           0 :                                 assert(NOTREACHED);
    1048                 :           0 :                 }
    1049                 :     2377672 :         }
    1050         [ +  + ]:        9552 :         while (i < arccount)
    1051                 :             :         {
    1052                 :             :                 /* s does not have anything matching a */
    1053                 :        6452 :                 struct arc *a = arcarray[i];
    1054                 :             : 
    1055                 :        6452 :                 createarc(nfa, a->type, a->co, a->from, s);
    1056                 :        6452 :                 i++;
    1057                 :        6452 :         }
    1058         [ -  + ]:       27315 : }
    1059                 :             : 
    1060                 :             : /*
    1061                 :             :  * moveouts - move all out arcs of a state to another state
    1062                 :             :  *
    1063                 :             :  * See comments for moveins()
    1064                 :             :  */
    1065                 :             : static void
    1066                 :        6551 : moveouts(struct nfa *nfa,
    1067                 :             :                  struct state *oldState,
    1068                 :             :                  struct state *newState)
    1069                 :             : {
    1070         [ +  - ]:        6551 :         assert(oldState != newState);
    1071                 :             : 
    1072         [ +  + ]:        6551 :         if (newState->nouts == 0)
    1073                 :             :         {
    1074                 :             :                 /* No need for de-duplication */
    1075                 :        3413 :                 struct arc *a;
    1076                 :             : 
    1077         [ +  + ]:        6941 :                 while ((a = oldState->outs) != NULL)
    1078                 :             :                 {
    1079                 :        3528 :                         createarc(nfa, a->type, a->co, newState, a->to);
    1080                 :        3528 :                         freearc(nfa, a);
    1081                 :             :                 }
    1082                 :        3413 :         }
    1083   [ +  +  +  +  :        3138 :         else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
                   +  - ]
    1084                 :             :         {
    1085                 :             :                 /* With not too many arcs, just do them one at a time */
    1086                 :        3114 :                 struct arc *a;
    1087                 :             : 
    1088         [ +  + ]:        6559 :                 while ((a = oldState->outs) != NULL)
    1089                 :             :                 {
    1090                 :        3445 :                         cparc(nfa, a, newState, a->to);
    1091                 :        3445 :                         freearc(nfa, a);
    1092                 :             :                 }
    1093                 :        3114 :         }
    1094                 :             :         else
    1095                 :             :         {
    1096                 :             :                 /*
    1097                 :             :                  * With many arcs, use a sort-merge approach.  Note changearcsource()
    1098                 :             :                  * will put the arc onto the front of newState's chain, so it does not
    1099                 :             :                  * break our walk through the sorted part of the chain.
    1100                 :             :                  */
    1101                 :          24 :                 struct arc *oa;
    1102                 :          24 :                 struct arc *na;
    1103                 :             : 
    1104                 :             :                 /*
    1105                 :             :                  * Because we bypass newarc() in this code path, we'd better include a
    1106                 :             :                  * cancel check.
    1107                 :             :                  */
    1108         [ #  # ]:          24 :                 INTERRUPT(nfa->v->re);
    1109                 :             : 
    1110                 :           0 :                 sortouts(nfa, oldState);
    1111                 :           0 :                 sortouts(nfa, newState);
    1112         [ #  # ]:           0 :                 if (NISERR())
    1113                 :           0 :                         return;                         /* might have failed to sort */
    1114                 :           0 :                 oa = oldState->outs;
    1115                 :           0 :                 na = newState->outs;
    1116   [ #  #  #  # ]:           0 :                 while (oa != NULL && na != NULL)
    1117                 :             :                 {
    1118                 :           0 :                         struct arc *a = oa;
    1119                 :             : 
    1120   [ #  #  #  # ]:           0 :                         switch (sortouts_cmp(&oa, &na))
    1121                 :             :                         {
    1122                 :             :                                 case -1:
    1123                 :             :                                         /* newState does not have anything matching oa */
    1124                 :           0 :                                         oa = oa->outchain;
    1125                 :             : 
    1126                 :             :                                         /*
    1127                 :             :                                          * Rather than doing createarc+freearc, we can just unlink
    1128                 :             :                                          * and relink the existing arc struct.
    1129                 :             :                                          */
    1130                 :           0 :                                         changearcsource(a, newState);
    1131                 :           0 :                                         break;
    1132                 :             :                                 case 0:
    1133                 :             :                                         /* match, advance in both lists */
    1134                 :           0 :                                         oa = oa->outchain;
    1135                 :           0 :                                         na = na->outchain;
    1136                 :             :                                         /* ... and drop duplicate arc from oldState */
    1137                 :           0 :                                         freearc(nfa, a);
    1138                 :           0 :                                         break;
    1139                 :             :                                 case +1:
    1140                 :             :                                         /* advance only na; oa might have a match later */
    1141                 :           0 :                                         na = na->outchain;
    1142                 :           0 :                                         break;
    1143                 :             :                                 default:
    1144                 :           0 :                                         assert(NOTREACHED);
    1145                 :           0 :                         }
    1146                 :           0 :                 }
    1147         [ #  # ]:           0 :                 while (oa != NULL)
    1148                 :             :                 {
    1149                 :             :                         /* newState does not have anything matching oa */
    1150                 :           0 :                         struct arc *a = oa;
    1151                 :             : 
    1152                 :           0 :                         oa = oa->outchain;
    1153                 :           0 :                         changearcsource(a, newState);
    1154                 :           0 :                 }
    1155      [ #  #  # ]:           0 :         }
    1156                 :             : 
    1157         [ +  - ]:        6527 :         assert(oldState->nouts == 0);
    1158         [ +  - ]:        6527 :         assert(oldState->outs == NULL);
    1159                 :        6527 : }
    1160                 :             : 
    1161                 :             : /*
    1162                 :             :  * copyouts - copy out arcs of a state to another state
    1163                 :             :  *
    1164                 :             :  * See comments for copyins()
    1165                 :             :  */
    1166                 :             : static void
    1167                 :         787 : copyouts(struct nfa *nfa,
    1168                 :             :                  struct state *oldState,
    1169                 :             :                  struct state *newState)
    1170                 :             : {
    1171         [ +  - ]:         787 :         assert(oldState != newState);
    1172         [ +  - ]:         787 :         assert(newState->nouts == 0);        /* see comment above */
    1173                 :             : 
    1174         [ +  - ]:         787 :         if (newState->nouts == 0)
    1175                 :             :         {
    1176                 :             :                 /* No need for de-duplication */
    1177                 :         787 :                 struct arc *a;
    1178                 :             : 
    1179         [ +  + ]:        8653 :                 for (a = oldState->outs; a != NULL; a = a->outchain)
    1180                 :        7866 :                         createarc(nfa, a->type, a->co, newState, a->to);
    1181                 :         787 :         }
    1182                 :             : #ifdef NOT_USED                                 /* see comment above */
    1183                 :             :         else if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
    1184                 :             :         {
    1185                 :             :                 /* With not too many arcs, just do them one at a time */
    1186                 :             :                 struct arc *a;
    1187                 :             : 
    1188                 :             :                 for (a = oldState->outs; a != NULL; a = a->outchain)
    1189                 :             :                         cparc(nfa, a, newState, a->to);
    1190                 :             :         }
    1191                 :             :         else
    1192                 :             :         {
    1193                 :             :                 /*
    1194                 :             :                  * With many arcs, use a sort-merge approach.  Note that createarc()
    1195                 :             :                  * will put new arcs onto the front of newState's chain, so it does
    1196                 :             :                  * not break our walk through the sorted part of the chain.
    1197                 :             :                  */
    1198                 :             :                 struct arc *oa;
    1199                 :             :                 struct arc *na;
    1200                 :             : 
    1201                 :             :                 /*
    1202                 :             :                  * Because we bypass newarc() in this code path, we'd better include a
    1203                 :             :                  * cancel check.
    1204                 :             :                  */
    1205                 :             :                 INTERRUPT(nfa->v->re);
    1206                 :             : 
    1207                 :             :                 sortouts(nfa, oldState);
    1208                 :             :                 sortouts(nfa, newState);
    1209                 :             :                 if (NISERR())
    1210                 :             :                         return;                         /* might have failed to sort */
    1211                 :             :                 oa = oldState->outs;
    1212                 :             :                 na = newState->outs;
    1213                 :             :                 while (oa != NULL && na != NULL)
    1214                 :             :                 {
    1215                 :             :                         struct arc *a = oa;
    1216                 :             : 
    1217                 :             :                         switch (sortouts_cmp(&oa, &na))
    1218                 :             :                         {
    1219                 :             :                                 case -1:
    1220                 :             :                                         /* newState does not have anything matching oa */
    1221                 :             :                                         oa = oa->outchain;
    1222                 :             :                                         createarc(nfa, a->type, a->co, newState, a->to);
    1223                 :             :                                         break;
    1224                 :             :                                 case 0:
    1225                 :             :                                         /* match, advance in both lists */
    1226                 :             :                                         oa = oa->outchain;
    1227                 :             :                                         na = na->outchain;
    1228                 :             :                                         break;
    1229                 :             :                                 case +1:
    1230                 :             :                                         /* advance only na; oa might have a match later */
    1231                 :             :                                         na = na->outchain;
    1232                 :             :                                         break;
    1233                 :             :                                 default:
    1234                 :             :                                         assert(NOTREACHED);
    1235                 :             :                         }
    1236                 :             :                 }
    1237                 :             :                 while (oa != NULL)
    1238                 :             :                 {
    1239                 :             :                         /* newState does not have anything matching oa */
    1240                 :             :                         struct arc *a = oa;
    1241                 :             : 
    1242                 :             :                         oa = oa->outchain;
    1243                 :             :                         createarc(nfa, a->type, a->co, newState, a->to);
    1244                 :             :                 }
    1245                 :             :         }
    1246                 :             : #endif                                                  /* NOT_USED */
    1247                 :         787 : }
    1248                 :             : 
    1249                 :             : /*
    1250                 :             :  * cloneouts - copy out arcs of a state to another state pair, modifying type
    1251                 :             :  *
    1252                 :             :  * This is only used to convert PLAIN arcs to AHEAD/BEHIND arcs, which share
    1253                 :             :  * the same interpretation of "co".  It wouldn't be sensible with LACONs.
    1254                 :             :  */
    1255                 :             : static void
    1256                 :          23 : cloneouts(struct nfa *nfa,
    1257                 :             :                   struct state *old,
    1258                 :             :                   struct state *from,
    1259                 :             :                   struct state *to,
    1260                 :             :                   int type)
    1261                 :             : {
    1262                 :          23 :         struct arc *a;
    1263                 :             : 
    1264         [ +  - ]:          23 :         assert(old != from);
    1265   [ +  +  +  - ]:          23 :         assert(type == AHEAD || type == BEHIND);
    1266                 :             : 
    1267         [ +  + ]:          66 :         for (a = old->outs; a != NULL; a = a->outchain)
    1268                 :             :         {
    1269         [ +  - ]:          43 :                 assert(a->type == PLAIN);
    1270                 :          43 :                 newarc(nfa, type, a->co, from, to);
    1271                 :          43 :         }
    1272                 :          23 : }
    1273                 :             : 
    1274                 :             : /*
    1275                 :             :  * delsub - delete a sub-NFA, updating subre pointers if necessary
    1276                 :             :  *
    1277                 :             :  * This uses a recursive traversal of the sub-NFA, marking already-seen
    1278                 :             :  * states using their tmp pointer.
    1279                 :             :  */
    1280                 :             : static void
    1281                 :        1321 : delsub(struct nfa *nfa,
    1282                 :             :            struct state *lp,            /* the sub-NFA goes from here... */
    1283                 :             :            struct state *rp)            /* ...to here, *not* inclusive */
    1284                 :             : {
    1285         [ +  - ]:        1321 :         assert(lp != rp);
    1286                 :             : 
    1287                 :        1321 :         rp->tmp = rp;                                /* mark end */
    1288                 :             : 
    1289                 :        1321 :         deltraverse(nfa, lp, lp);
    1290         [ -  + ]:        1321 :         if (NISERR())
    1291                 :           0 :                 return;                                 /* asserts might not hold after failure */
    1292         [ +  - ]:        1321 :         assert(lp->nouts == 0 && rp->nins == 0);  /* did the job */
    1293         [ +  - ]:        1321 :         assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
    1294                 :             : 
    1295                 :        1321 :         rp->tmp = NULL;                              /* unmark end */
    1296                 :        1321 :         lp->tmp = NULL;                              /* and begin, marked by deltraverse */
    1297                 :        1321 : }
    1298                 :             : 
    1299                 :             : /*
    1300                 :             :  * deltraverse - the recursive heart of delsub
    1301                 :             :  * This routine's basic job is to destroy all out-arcs of the state.
    1302                 :             :  */
    1303                 :             : static void
    1304                 :        3911 : deltraverse(struct nfa *nfa,
    1305                 :             :                         struct state *leftend,
    1306                 :             :                         struct state *s)
    1307                 :             : {
    1308                 :        3911 :         struct arc *a;
    1309                 :        3911 :         struct state *to;
    1310                 :             : 
    1311                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    1312         [ -  + ]:        3911 :         if (STACK_TOO_DEEP(nfa->v->re))
    1313                 :             :         {
    1314         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    1315                 :           0 :                 return;
    1316                 :             :         }
    1317                 :             : 
    1318         [ +  + ]:        3911 :         if (s->nouts == 0)
    1319                 :          18 :                 return;                                 /* nothing to do */
    1320         [ +  + ]:        3893 :         if (s->tmp != NULL)
    1321                 :        1308 :                 return;                                 /* already in progress */
    1322                 :             : 
    1323                 :        2585 :         s->tmp = s;                                  /* mark as in progress */
    1324                 :             : 
    1325         [ +  + ]:        5175 :         while ((a = s->outs) != NULL)
    1326                 :             :         {
    1327                 :        2590 :                 to = a->to;
    1328                 :        2590 :                 deltraverse(nfa, leftend, to);
    1329         [ -  + ]:        2590 :                 if (NISERR())
    1330                 :           0 :                         return;                         /* asserts might not hold after failure */
    1331   [ +  +  +  - ]:        2590 :                 assert(to->nouts == 0 || to->tmp != NULL);
    1332                 :        2590 :                 freearc(nfa, a);
    1333   [ +  +  +  + ]:        2590 :                 if (to->nins == 0 && to->tmp == NULL)
    1334                 :             :                 {
    1335         [ +  - ]:        1264 :                         assert(to->nouts == 0);
    1336                 :        1264 :                         freestate(nfa, to);
    1337                 :        1264 :                 }
    1338                 :             :         }
    1339                 :             : 
    1340         [ +  - ]:        2585 :         assert(s->no != FREESTATE); /* we're still here */
    1341   [ +  +  +  - ]:        2585 :         assert(s == leftend || s->nins != 0);        /* and still reachable */
    1342         [ +  - ]:        2585 :         assert(s->nouts == 0);               /* but have no outarcs */
    1343                 :             : 
    1344                 :        2585 :         s->tmp = NULL;                               /* we're done here */
    1345         [ -  + ]:        3911 : }
    1346                 :             : 
    1347                 :             : /*
    1348                 :             :  * dupnfa - duplicate sub-NFA
    1349                 :             :  *
    1350                 :             :  * Another recursive traversal, this time using tmp to point to duplicates
    1351                 :             :  * as well as mark already-seen states.  (You knew there was a reason why
    1352                 :             :  * it's a state pointer, didn't you? :-))
    1353                 :             :  */
    1354                 :             : static void
    1355                 :        1325 : dupnfa(struct nfa *nfa,
    1356                 :             :            struct state *start,         /* duplicate of subNFA starting here */
    1357                 :             :            struct state *stop,          /* and stopping here */
    1358                 :             :            struct state *from,          /* stringing duplicate from here */
    1359                 :             :            struct state *to)            /* to here */
    1360                 :             : {
    1361         [ +  - ]:        1325 :         if (start == stop)
    1362                 :             :         {
    1363                 :           0 :                 newarc(nfa, EMPTY, 0, from, to);
    1364                 :           0 :                 return;
    1365                 :             :         }
    1366                 :             : 
    1367                 :        1325 :         stop->tmp = to;
    1368                 :        1325 :         duptraverse(nfa, start, from);
    1369                 :             :         /* done, except for clearing out the tmp pointers */
    1370                 :             : 
    1371                 :        1325 :         stop->tmp = NULL;
    1372                 :        1325 :         cleartraverse(nfa, start);
    1373                 :        1325 : }
    1374                 :             : 
    1375                 :             : /*
    1376                 :             :  * duptraverse - recursive heart of dupnfa
    1377                 :             :  */
    1378                 :             : static void
    1379                 :       29840 : duptraverse(struct nfa *nfa,
    1380                 :             :                         struct state *s,
    1381                 :             :                         struct state *stmp) /* s's duplicate, or NULL */
    1382                 :             : {
    1383                 :       29840 :         struct arc *a;
    1384                 :             : 
    1385                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    1386         [ -  + ]:       29840 :         if (STACK_TOO_DEEP(nfa->v->re))
    1387                 :             :         {
    1388         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    1389                 :           0 :                 return;
    1390                 :             :         }
    1391                 :             : 
    1392         [ +  + ]:       29840 :         if (s->tmp != NULL)
    1393                 :        6499 :                 return;                                 /* already done */
    1394                 :             : 
    1395         [ +  + ]:       23341 :         s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
    1396         [ +  - ]:       23341 :         if (s->tmp == NULL)
    1397                 :             :         {
    1398         [ #  # ]:           0 :                 assert(NISERR());
    1399                 :           0 :                 return;
    1400                 :             :         }
    1401                 :             : 
    1402   [ +  +  +  + ]:       51856 :         for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
    1403                 :             :         {
    1404                 :       28515 :                 duptraverse(nfa, a->to, (struct state *) NULL);
    1405         [ +  - ]:       28515 :                 if (NISERR())
    1406                 :           0 :                         break;
    1407         [ -  + ]:       28515 :                 assert(a->to->tmp != NULL);
    1408                 :       28515 :                 cparc(nfa, a, s->tmp, a->to->tmp);
    1409                 :       28515 :         }
    1410         [ -  + ]:       29840 : }
    1411                 :             : 
    1412                 :             : /*
    1413                 :             :  * removeconstraints - remove any constraints in an NFA
    1414                 :             :  *
    1415                 :             :  * Constraint arcs are replaced by empty arcs, essentially treating all
    1416                 :             :  * constraints as automatically satisfied.
    1417                 :             :  */
    1418                 :             : static void
    1419                 :          18 : removeconstraints(struct nfa *nfa,
    1420                 :             :                                   struct state *start,  /* process subNFA starting here */
    1421                 :             :                                   struct state *stop)   /* and stopping here */
    1422                 :             : {
    1423         [ +  - ]:          18 :         if (start == stop)
    1424                 :           0 :                 return;
    1425                 :             : 
    1426                 :          18 :         stop->tmp = stop;
    1427                 :          18 :         removetraverse(nfa, start);
    1428                 :             :         /* done, except for clearing out the tmp pointers */
    1429                 :             : 
    1430                 :          18 :         stop->tmp = NULL;
    1431                 :          18 :         cleartraverse(nfa, start);
    1432                 :          18 : }
    1433                 :             : 
    1434                 :             : /*
    1435                 :             :  * removetraverse - recursive heart of removeconstraints
    1436                 :             :  */
    1437                 :             : static void
    1438                 :          42 : removetraverse(struct nfa *nfa,
    1439                 :             :                            struct state *s)
    1440                 :             : {
    1441                 :          42 :         struct arc *a;
    1442                 :          42 :         struct arc *oa;
    1443                 :             : 
    1444                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    1445         [ -  + ]:          42 :         if (STACK_TOO_DEEP(nfa->v->re))
    1446                 :             :         {
    1447         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    1448                 :           0 :                 return;
    1449                 :             :         }
    1450                 :             : 
    1451         [ +  + ]:          42 :         if (s->tmp != NULL)
    1452                 :          20 :                 return;                                 /* already done */
    1453                 :             : 
    1454                 :          22 :         s->tmp = s;
    1455   [ +  +  +  + ]:          46 :         for (a = s->outs; a != NULL && !NISERR(); a = oa)
    1456                 :             :         {
    1457                 :          24 :                 removetraverse(nfa, a->to);
    1458         [ -  + ]:          24 :                 if (NISERR())
    1459                 :           0 :                         break;
    1460                 :          24 :                 oa = a->outchain;
    1461      [ -  +  - ]:          24 :                 switch (a->type)
    1462                 :             :                 {
    1463                 :             :                         case PLAIN:
    1464                 :             :                         case EMPTY:
    1465                 :             :                         case CANTMATCH:
    1466                 :             :                                 /* nothing to do */
    1467                 :          24 :                                 break;
    1468                 :             :                         case AHEAD:
    1469                 :             :                         case BEHIND:
    1470                 :             :                         case '^':
    1471                 :             :                         case '$':
    1472                 :             :                         case LACON:
    1473                 :             :                                 /* replace it */
    1474                 :           0 :                                 newarc(nfa, EMPTY, 0, s, a->to);
    1475                 :           0 :                                 freearc(nfa, a);
    1476                 :           0 :                                 break;
    1477                 :             :                         default:
    1478         [ #  # ]:           0 :                                 NERR(REG_ASSERT);
    1479                 :           0 :                                 break;
    1480                 :             :                 }
    1481                 :          24 :         }
    1482         [ -  + ]:          42 : }
    1483                 :             : 
    1484                 :             : /*
    1485                 :             :  * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
    1486                 :             :  */
    1487                 :             : static void
    1488                 :      123859 : cleartraverse(struct nfa *nfa,
    1489                 :             :                           struct state *s)
    1490                 :             : {
    1491                 :      123859 :         struct arc *a;
    1492                 :             : 
    1493                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    1494         [ -  + ]:      123859 :         if (STACK_TOO_DEEP(nfa->v->re))
    1495                 :             :         {
    1496         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    1497                 :           0 :                 return;
    1498                 :             :         }
    1499                 :             : 
    1500         [ +  + ]:      123859 :         if (s->tmp == NULL)
    1501                 :       36741 :                 return;
    1502                 :       87118 :         s->tmp = NULL;
    1503                 :             : 
    1504         [ +  + ]:      205787 :         for (a = s->outs; a != NULL; a = a->outchain)
    1505                 :      118669 :                 cleartraverse(nfa, a->to);
    1506         [ -  + ]:      123859 : }
    1507                 :             : 
    1508                 :             : /*
    1509                 :             :  * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
    1510                 :             :  *
    1511                 :             :  * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
    1512                 :             :  * of a set of colors), return a state whose outarc list contains only PLAIN
    1513                 :             :  * arcs of those color(s).  Otherwise return NULL.
    1514                 :             :  *
    1515                 :             :  * This is used before optimizing the NFA, so there may be EMPTY arcs, which
    1516                 :             :  * we should ignore; the possibility of an EMPTY is why the result state could
    1517                 :             :  * be different from s1.
    1518                 :             :  *
    1519                 :             :  * It's worth troubling to handle multiple parallel PLAIN arcs here because a
    1520                 :             :  * bracket construct such as [abc] might yield either one or several parallel
    1521                 :             :  * PLAIN arcs depending on earlier atoms in the expression.  We'd rather that
    1522                 :             :  * that implementation detail not create user-visible performance differences.
    1523                 :             :  */
    1524                 :             : static struct state *
    1525                 :          25 : single_color_transition(struct state *s1, struct state *s2)
    1526                 :             : {
    1527                 :          25 :         struct arc *a;
    1528                 :             : 
    1529                 :             :         /* Ignore leading EMPTY arc, if any */
    1530   [ +  -  -  + ]:          25 :         if (s1->nouts == 1 && s1->outs->type == EMPTY)
    1531                 :          25 :                 s1 = s1->outs->to;
    1532                 :             :         /* Likewise for any trailing EMPTY arc */
    1533   [ +  -  -  + ]:          25 :         if (s2->nins == 1 && s2->ins->type == EMPTY)
    1534                 :          25 :                 s2 = s2->ins->from;
    1535                 :             :         /* Perhaps we could have a single-state loop in between, if so reject */
    1536         [ +  - ]:          25 :         if (s1 == s2)
    1537                 :           0 :                 return NULL;
    1538                 :             :         /* s1 must have at least one outarc... */
    1539         [ +  - ]:          25 :         if (s1->outs == NULL)
    1540                 :           0 :                 return NULL;
    1541                 :             :         /* ... and they must all be PLAIN arcs to s2 */
    1542         [ +  + ]:          45 :         for (a = s1->outs; a != NULL; a = a->outchain)
    1543                 :             :         {
    1544   [ +  -  +  + ]:          27 :                 if (a->type != PLAIN || a->to != s2)
    1545                 :           7 :                         return NULL;
    1546                 :          20 :         }
    1547                 :             :         /* OK, return s1 as the possessor of the relevant outarcs */
    1548                 :          18 :         return s1;
    1549                 :          25 : }
    1550                 :             : 
    1551                 :             : /*
    1552                 :             :  * specialcolors - fill in special colors for an NFA
    1553                 :             :  */
    1554                 :             : static void
    1555                 :        1925 : specialcolors(struct nfa *nfa)
    1556                 :             : {
    1557                 :             :         /* false colors for BOS, BOL, EOS, EOL */
    1558         [ +  + ]:        1925 :         if (nfa->parent == NULL)
    1559                 :             :         {
    1560                 :         806 :                 nfa->bos[0] = pseudocolor(nfa->cm);
    1561                 :         806 :                 nfa->bos[1] = pseudocolor(nfa->cm);
    1562                 :         806 :                 nfa->eos[0] = pseudocolor(nfa->cm);
    1563                 :         806 :                 nfa->eos[1] = pseudocolor(nfa->cm);
    1564                 :         806 :         }
    1565                 :             :         else
    1566                 :             :         {
    1567         [ +  - ]:        1119 :                 assert(nfa->parent->bos[0] != COLORLESS);
    1568                 :        1119 :                 nfa->bos[0] = nfa->parent->bos[0];
    1569         [ +  - ]:        1119 :                 assert(nfa->parent->bos[1] != COLORLESS);
    1570                 :        1119 :                 nfa->bos[1] = nfa->parent->bos[1];
    1571         [ +  - ]:        1119 :                 assert(nfa->parent->eos[0] != COLORLESS);
    1572                 :        1119 :                 nfa->eos[0] = nfa->parent->eos[0];
    1573         [ +  - ]:        1119 :                 assert(nfa->parent->eos[1] != COLORLESS);
    1574                 :        1119 :                 nfa->eos[1] = nfa->parent->eos[1];
    1575                 :             :         }
    1576                 :        1925 : }
    1577                 :             : 
    1578                 :             : /*
    1579                 :             :  * optimize - optimize an NFA
    1580                 :             :  *
    1581                 :             :  * The main goal of this function is not so much "optimization" (though it
    1582                 :             :  * does try to get rid of useless NFA states) as reducing the NFA to a form
    1583                 :             :  * the regex executor can handle.  The executor, and indeed the cNFA format
    1584                 :             :  * that is its input, can only handle PLAIN and LACON arcs.  The output of
    1585                 :             :  * the regex parser also includes EMPTY (do-nothing) arcs, as well as
    1586                 :             :  * ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
    1587                 :             :  * We first get rid of EMPTY arcs and then deal with the constraint arcs.
    1588                 :             :  * The hardest part of either job is to get rid of circular loops of the
    1589                 :             :  * target arc type.  We would have to do that in any case, though, as such a
    1590                 :             :  * loop would otherwise allow the executor to cycle through the loop endlessly
    1591                 :             :  * without making any progress in the input string.
    1592                 :             :  */
    1593                 :             : static long                                             /* re_info bits */
    1594                 :        1924 : optimize(struct nfa *nfa,
    1595                 :             :                  FILE *f)                               /* for debug output; NULL none */
    1596                 :             : {
    1597                 :             : #ifdef REG_DEBUG
    1598                 :             :         int                     verbose = (f != NULL) ? 1 : 0;
    1599                 :             : 
    1600                 :             :         if (verbose)
    1601                 :             :                 fprintf(f, "\ninitial cleanup:\n");
    1602                 :             : #endif
    1603                 :             :         /* If we have any CANTMATCH arcs, drop them; but this is uncommon */
    1604         [ +  - ]:        1924 :         if (nfa->flags & HASCANTMATCH)
    1605                 :             :         {
    1606                 :           0 :                 removecantmatch(nfa);
    1607                 :           0 :                 nfa->flags &= ~HASCANTMATCH;
    1608                 :           0 :         }
    1609                 :        1924 :         cleanup(nfa);                           /* may simplify situation */
    1610                 :             : #ifdef REG_DEBUG
    1611                 :             :         if (verbose)
    1612                 :             :                 dumpnfa(nfa, f);
    1613                 :             :         if (verbose)
    1614                 :             :                 fprintf(f, "\nempties:\n");
    1615                 :             : #endif
    1616                 :        1924 :         fixempties(nfa, f);                     /* get rid of EMPTY arcs */
    1617                 :             : #ifdef REG_DEBUG
    1618                 :             :         if (verbose)
    1619                 :             :                 fprintf(f, "\nconstraints:\n");
    1620                 :             : #endif
    1621                 :        1924 :         fixconstraintloops(nfa, f); /* get rid of constraint loops */
    1622                 :        1924 :         pullback(nfa, f);                       /* pull back constraints backward */
    1623                 :        1924 :         pushfwd(nfa, f);                        /* push fwd constraints forward */
    1624                 :             : #ifdef REG_DEBUG
    1625                 :             :         if (verbose)
    1626                 :             :                 fprintf(f, "\nfinal cleanup:\n");
    1627                 :             : #endif
    1628                 :        1924 :         cleanup(nfa);                           /* final tidying */
    1629                 :             : #ifdef REG_DEBUG
    1630                 :             :         if (verbose)
    1631                 :             :                 dumpnfa(nfa, f);
    1632                 :             : #endif
    1633                 :        1924 :         return analyze(nfa);            /* and analysis */
    1634                 :             : }
    1635                 :             : 
    1636                 :             : /*
    1637                 :             :  * pullback - pull back constraints backward to eliminate them
    1638                 :             :  */
    1639                 :             : static void
    1640                 :        1924 : pullback(struct nfa *nfa,
    1641                 :             :                  FILE *f)                               /* for debug output; NULL none */
    1642                 :             : {
    1643                 :        1924 :         struct state *s;
    1644                 :        1924 :         struct state *nexts;
    1645                 :        1924 :         struct arc *a;
    1646                 :        1924 :         struct arc *nexta;
    1647                 :        1924 :         struct state *intermediates;
    1648                 :        1924 :         int                     progress;
    1649                 :             : 
    1650                 :             :         /* find and pull until there are no more */
    1651                 :        1924 :         do
    1652                 :             :         {
    1653                 :        3161 :                 progress = 0;
    1654   [ +  +  +  + ]:       48433 :                 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1655                 :             :                 {
    1656                 :       45272 :                         nexts = s->next;
    1657                 :       45272 :                         intermediates = NULL;
    1658   [ +  +  +  + ]:      115653 :                         for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    1659                 :             :                         {
    1660                 :       70381 :                                 nexta = a->outchain;
    1661   [ +  +  +  + ]:       70381 :                                 if (a->type == '^' || a->type == BEHIND)
    1662         [ +  + ]:       11305 :                                         if (pull(nfa, a, &intermediates))
    1663                 :        2748 :                                                 progress = 1;
    1664                 :       70381 :                         }
    1665                 :             :                         /* clear tmp fields of intermediate states created here */
    1666         [ +  + ]:       45504 :                         while (intermediates != NULL)
    1667                 :             :                         {
    1668                 :         232 :                                 struct state *ns = intermediates->tmp;
    1669                 :             : 
    1670                 :         232 :                                 intermediates->tmp = NULL;
    1671                 :         232 :                                 intermediates = ns;
    1672                 :         232 :                         }
    1673                 :             :                         /* if s is now useless, get rid of it */
    1674   [ +  +  +  + ]:       45272 :                         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1675                 :        2483 :                                 dropstate(nfa, s);
    1676                 :       45272 :                 }
    1677   [ +  +  +  - ]:        3161 :                 if (progress && f != NULL)
    1678                 :           0 :                         dumpnfa(nfa, f);
    1679   [ +  +  +  + ]:        3161 :         } while (progress && !NISERR());
    1680         [ +  + ]:        1924 :         if (NISERR())
    1681                 :           1 :                 return;
    1682                 :             : 
    1683                 :             :         /*
    1684                 :             :          * Any ^ constraints we were able to pull to the start state can now be
    1685                 :             :          * replaced by PLAIN arcs referencing the BOS or BOL colors.  There should
    1686                 :             :          * be no other ^ or BEHIND arcs left in the NFA, though we do not check
    1687                 :             :          * that here (compact() will fail if so).
    1688                 :             :          */
    1689         [ +  + ]:        6328 :         for (a = nfa->pre->outs; a != NULL; a = nexta)
    1690                 :             :         {
    1691                 :        4405 :                 nexta = a->outchain;
    1692         [ +  + ]:        4405 :                 if (a->type == '^')
    1693                 :             :                 {
    1694   [ +  +  -  + ]:        3321 :                         assert(a->co == 0 || a->co == 1);
    1695                 :        3321 :                         newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
    1696                 :        3321 :                         freearc(nfa, a);
    1697                 :        3321 :                 }
    1698                 :        4405 :         }
    1699         [ -  + ]:        1924 : }
    1700                 :             : 
    1701                 :             : /*
    1702                 :             :  * pull - pull a back constraint backward past its source state
    1703                 :             :  *
    1704                 :             :  * Returns 1 if successful (which it always is unless the source is the
    1705                 :             :  * start state or we have an internal error), 0 if nothing happened.
    1706                 :             :  *
    1707                 :             :  * A significant property of this function is that it deletes no pre-existing
    1708                 :             :  * states, and no outarcs of the constraint's from state other than the given
    1709                 :             :  * constraint arc.  This makes the loops in pullback() safe, at the cost that
    1710                 :             :  * we may leave useless states behind.  Therefore, we leave it to pullback()
    1711                 :             :  * to delete such states.
    1712                 :             :  *
    1713                 :             :  * If the from state has multiple back-constraint outarcs, and/or multiple
    1714                 :             :  * compatible constraint inarcs, we only need to create one new intermediate
    1715                 :             :  * state per combination of predecessor and successor states.  *intermediates
    1716                 :             :  * points to a list of such intermediate states for this from state (chained
    1717                 :             :  * through their tmp fields).
    1718                 :             :  */
    1719                 :             : static int
    1720                 :        8557 : pull(struct nfa *nfa,
    1721                 :             :          struct arc *con,
    1722                 :             :          struct state **intermediates)
    1723                 :             : {
    1724                 :        8557 :         struct state *from = con->from;
    1725                 :        8557 :         struct state *to = con->to;
    1726                 :        8557 :         struct arc *a;
    1727                 :        8557 :         struct arc *nexta;
    1728                 :        8557 :         struct state *s;
    1729                 :             : 
    1730         [ +  - ]:        8557 :         assert(from != to);                     /* should have gotten rid of this earlier */
    1731         [ +  + ]:        8557 :         if (from->flag)                              /* can't pull back beyond start */
    1732                 :        5809 :                 return 0;
    1733         [ +  + ]:        2748 :         if (from->nins == 0)
    1734                 :             :         {                                                       /* unreachable */
    1735                 :         454 :                 freearc(nfa, con);
    1736                 :         454 :                 return 1;
    1737                 :             :         }
    1738                 :             : 
    1739                 :             :         /*
    1740                 :             :          * First, clone from state if necessary to avoid other outarcs.  This may
    1741                 :             :          * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1742                 :             :          * clone state again at the bottom.
    1743                 :             :          */
    1744         [ +  + ]:        2294 :         if (from->nouts > 1)
    1745                 :             :         {
    1746                 :         951 :                 s = newstate(nfa);
    1747         [ -  + ]:         951 :                 if (NISERR())
    1748                 :           0 :                         return 0;
    1749                 :         951 :                 copyins(nfa, from, s);  /* duplicate inarcs */
    1750                 :         951 :                 cparc(nfa, con, s, to); /* move constraint arc */
    1751                 :         951 :                 freearc(nfa, con);
    1752         [ -  + ]:         951 :                 if (NISERR())
    1753                 :           0 :                         return 0;
    1754                 :         951 :                 from = s;
    1755                 :         951 :                 con = from->outs;
    1756                 :         951 :         }
    1757         [ +  - ]:        2294 :         assert(from->nouts == 1);
    1758                 :             : 
    1759                 :             :         /* propagate the constraint into the from state's inarcs */
    1760   [ +  +  +  + ]:       13963 :         for (a = from->ins; a != NULL && !NISERR(); a = nexta)
    1761                 :             :         {
    1762                 :       11669 :                 nexta = a->inchain;
    1763   [ +  +  +  +  :       11669 :                 switch (combine(nfa, con, a))
                      - ]
    1764                 :             :                 {
    1765                 :             :                         case INCOMPATIBLE:      /* destroy the arc */
    1766                 :        7518 :                                 freearc(nfa, a);
    1767                 :        7518 :                                 break;
    1768                 :             :                         case SATISFIED:         /* no action needed */
    1769                 :             :                                 break;
    1770                 :             :                         case COMPATIBLE:        /* swap the two arcs, more or less */
    1771                 :             :                                 /* need an intermediate state, but might have one already */
    1772         [ +  + ]:        2680 :                                 for (s = *intermediates; s != NULL; s = s->tmp)
    1773                 :             :                                 {
    1774         [ +  - ]:        2448 :                                         assert(s->nins > 0 && s->nouts > 0);
    1775   [ +  +  +  + ]:        2448 :                                         if (s->ins->from == a->from && s->outs->to == to)
    1776                 :        2178 :                                                 break;
    1777                 :         270 :                                 }
    1778         [ +  + ]:        2410 :                                 if (s == NULL)
    1779                 :             :                                 {
    1780                 :         232 :                                         s = newstate(nfa);
    1781         [ +  - ]:         232 :                                         if (NISERR())
    1782                 :           0 :                                                 return 0;
    1783                 :         232 :                                         s->tmp = *intermediates;
    1784                 :         232 :                                         *intermediates = s;
    1785                 :         232 :                                 }
    1786                 :        2410 :                                 cparc(nfa, con, a->from, s);
    1787                 :        2410 :                                 cparc(nfa, a, s, to);
    1788                 :        2410 :                                 freearc(nfa, a);
    1789                 :        2410 :                                 break;
    1790                 :             :                         case REPLACEARC:        /* replace arc's color */
    1791                 :          54 :                                 newarc(nfa, a->type, con->co, a->from, to);
    1792                 :          54 :                                 freearc(nfa, a);
    1793                 :          54 :                                 break;
    1794                 :             :                         default:
    1795                 :           0 :                                 assert(NOTREACHED);
    1796                 :           0 :                                 break;
    1797                 :             :                 }
    1798                 :       11669 :         }
    1799                 :             : 
    1800                 :             :         /* remaining inarcs, if any, incorporate the constraint */
    1801                 :        2294 :         moveins(nfa, from, to);
    1802                 :        2294 :         freearc(nfa, con);
    1803                 :             :         /* from state is now useless, but we leave it to pullback() to clean up */
    1804                 :        2294 :         return 1;
    1805                 :        8557 : }
    1806                 :             : 
    1807                 :             : /*
    1808                 :             :  * pushfwd - push forward constraints forward to eliminate them
    1809                 :             :  */
    1810                 :             : static void
    1811                 :        1924 : pushfwd(struct nfa *nfa,
    1812                 :             :                 FILE *f)                                /* for debug output; NULL none */
    1813                 :             : {
    1814                 :        1924 :         struct state *s;
    1815                 :        1924 :         struct state *nexts;
    1816                 :        1924 :         struct arc *a;
    1817                 :        1924 :         struct arc *nexta;
    1818                 :        1924 :         struct state *intermediates;
    1819                 :        1924 :         int                     progress;
    1820                 :             : 
    1821                 :             :         /* find and push until there are no more */
    1822                 :        1924 :         do
    1823                 :             :         {
    1824                 :        3147 :                 progress = 0;
    1825   [ +  +  +  + ]:       44068 :                 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    1826                 :             :                 {
    1827                 :       40921 :                         nexts = s->next;
    1828                 :       40921 :                         intermediates = NULL;
    1829   [ +  +  +  + ]:       99577 :                         for (a = s->ins; a != NULL && !NISERR(); a = nexta)
    1830                 :             :                         {
    1831                 :       58656 :                                 nexta = a->inchain;
    1832   [ +  +  +  + ]:       58656 :                                 if (a->type == '$' || a->type == AHEAD)
    1833         [ +  + ]:        8979 :                                         if (push(nfa, a, &intermediates))
    1834                 :        1906 :                                                 progress = 1;
    1835                 :       58656 :                         }
    1836                 :             :                         /* clear tmp fields of intermediate states created here */
    1837         [ -  + ]:       40921 :                         while (intermediates != NULL)
    1838                 :             :                         {
    1839                 :           0 :                                 struct state *ns = intermediates->tmp;
    1840                 :             : 
    1841                 :           0 :                                 intermediates->tmp = NULL;
    1842                 :           0 :                                 intermediates = ns;
    1843                 :           0 :                         }
    1844                 :             :                         /* if s is now useless, get rid of it */
    1845   [ +  +  +  + ]:       40921 :                         if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    1846                 :        1963 :                                 dropstate(nfa, s);
    1847                 :       40921 :                 }
    1848   [ +  +  +  - ]:        3147 :                 if (progress && f != NULL)
    1849                 :           0 :                         dumpnfa(nfa, f);
    1850   [ +  +  +  + ]:        3147 :         } while (progress && !NISERR());
    1851         [ +  + ]:        1924 :         if (NISERR())
    1852                 :           1 :                 return;
    1853                 :             : 
    1854                 :             :         /*
    1855                 :             :          * Any $ constraints we were able to push to the post state can now be
    1856                 :             :          * replaced by PLAIN arcs referencing the EOS or EOL colors.  There should
    1857                 :             :          * be no other $ or AHEAD arcs left in the NFA, though we do not check
    1858                 :             :          * that here (compact() will fail if so).
    1859                 :             :          */
    1860         [ +  + ]:        5335 :         for (a = nfa->post->ins; a != NULL; a = nexta)
    1861                 :             :         {
    1862                 :        3412 :                 nexta = a->inchain;
    1863         [ +  + ]:        3412 :                 if (a->type == '$')
    1864                 :             :                 {
    1865   [ +  +  -  + ]:        2649 :                         assert(a->co == 0 || a->co == 1);
    1866                 :        2649 :                         newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
    1867                 :        2649 :                         freearc(nfa, a);
    1868                 :        2649 :                 }
    1869                 :        3412 :         }
    1870         [ -  + ]:        1924 : }
    1871                 :             : 
    1872                 :             : /*
    1873                 :             :  * push - push a forward constraint forward past its destination state
    1874                 :             :  *
    1875                 :             :  * Returns 1 if successful (which it always is unless the destination is the
    1876                 :             :  * post state or we have an internal error), 0 if nothing happened.
    1877                 :             :  *
    1878                 :             :  * A significant property of this function is that it deletes no pre-existing
    1879                 :             :  * states, and no inarcs of the constraint's to state other than the given
    1880                 :             :  * constraint arc.  This makes the loops in pushfwd() safe, at the cost that
    1881                 :             :  * we may leave useless states behind.  Therefore, we leave it to pushfwd()
    1882                 :             :  * to delete such states.
    1883                 :             :  *
    1884                 :             :  * If the to state has multiple forward-constraint inarcs, and/or multiple
    1885                 :             :  * compatible constraint outarcs, we only need to create one new intermediate
    1886                 :             :  * state per combination of predecessor and successor states.  *intermediates
    1887                 :             :  * points to a list of such intermediate states for this to state (chained
    1888                 :             :  * through their tmp fields).
    1889                 :             :  */
    1890                 :             : static int
    1891                 :        7073 : push(struct nfa *nfa,
    1892                 :             :          struct arc *con,
    1893                 :             :          struct state **intermediates)
    1894                 :             : {
    1895                 :        7073 :         struct state *from = con->from;
    1896                 :        7073 :         struct state *to = con->to;
    1897                 :        7073 :         struct arc *a;
    1898                 :        7073 :         struct arc *nexta;
    1899                 :        7073 :         struct state *s;
    1900                 :             : 
    1901         [ +  - ]:        7073 :         assert(to != from);                     /* should have gotten rid of this earlier */
    1902         [ +  + ]:        7073 :         if (to->flag)                                /* can't push forward beyond end */
    1903                 :        5167 :                 return 0;
    1904         [ +  + ]:        1906 :         if (to->nouts == 0)
    1905                 :             :         {                                                       /* dead end */
    1906                 :          55 :                 freearc(nfa, con);
    1907                 :          55 :                 return 1;
    1908                 :             :         }
    1909                 :             : 
    1910                 :             :         /*
    1911                 :             :          * First, clone to state if necessary to avoid other inarcs.  This may
    1912                 :             :          * seem wasteful, but it simplifies the logic, and we'll get rid of the
    1913                 :             :          * clone state again at the bottom.
    1914                 :             :          */
    1915         [ +  + ]:        1851 :         if (to->nins > 1)
    1916                 :             :         {
    1917                 :         578 :                 s = newstate(nfa);
    1918         [ -  + ]:         578 :                 if (NISERR())
    1919                 :           0 :                         return 0;
    1920                 :         578 :                 copyouts(nfa, to, s);   /* duplicate outarcs */
    1921                 :         578 :                 cparc(nfa, con, from, s);       /* move constraint arc */
    1922                 :         578 :                 freearc(nfa, con);
    1923         [ -  + ]:         578 :                 if (NISERR())
    1924                 :           0 :                         return 0;
    1925                 :         578 :                 to = s;
    1926                 :         578 :                 con = to->ins;
    1927                 :         578 :         }
    1928         [ +  - ]:        1851 :         assert(to->nins == 1);
    1929                 :             : 
    1930                 :             :         /* propagate the constraint into the to state's outarcs */
    1931   [ +  +  +  + ]:        7792 :         for (a = to->outs; a != NULL && !NISERR(); a = nexta)
    1932                 :             :         {
    1933                 :        5941 :                 nexta = a->outchain;
    1934   [ +  +  -  +  :        5941 :                 switch (combine(nfa, con, a))
                      - ]
    1935                 :             :                 {
    1936                 :             :                         case INCOMPATIBLE:      /* destroy the arc */
    1937                 :        4339 :                                 freearc(nfa, a);
    1938                 :        4339 :                                 break;
    1939                 :             :                         case SATISFIED:         /* no action needed */
    1940                 :             :                                 break;
    1941                 :             :                         case COMPATIBLE:        /* swap the two arcs, more or less */
    1942                 :             :                                 /* need an intermediate state, but might have one already */
    1943         [ #  # ]:           0 :                                 for (s = *intermediates; s != NULL; s = s->tmp)
    1944                 :             :                                 {
    1945         [ #  # ]:           0 :                                         assert(s->nins > 0 && s->nouts > 0);
    1946   [ #  #  #  # ]:           0 :                                         if (s->ins->from == from && s->outs->to == a->to)
    1947                 :           0 :                                                 break;
    1948                 :           0 :                                 }
    1949         [ #  # ]:           0 :                                 if (s == NULL)
    1950                 :             :                                 {
    1951                 :           0 :                                         s = newstate(nfa);
    1952         [ #  # ]:           0 :                                         if (NISERR())
    1953                 :           0 :                                                 return 0;
    1954                 :           0 :                                         s->tmp = *intermediates;
    1955                 :           0 :                                         *intermediates = s;
    1956                 :           0 :                                 }
    1957                 :           0 :                                 cparc(nfa, con, s, a->to);
    1958                 :           0 :                                 cparc(nfa, a, from, s);
    1959                 :           0 :                                 freearc(nfa, a);
    1960                 :           0 :                                 break;
    1961                 :             :                         case REPLACEARC:        /* replace arc's color */
    1962                 :          79 :                                 newarc(nfa, a->type, con->co, from, a->to);
    1963                 :          79 :                                 freearc(nfa, a);
    1964                 :          79 :                                 break;
    1965                 :             :                         default:
    1966                 :           0 :                                 assert(NOTREACHED);
    1967                 :           0 :                                 break;
    1968                 :             :                 }
    1969                 :        5941 :         }
    1970                 :             : 
    1971                 :             :         /* remaining outarcs, if any, incorporate the constraint */
    1972                 :        1851 :         moveouts(nfa, to, from);
    1973                 :        1851 :         freearc(nfa, con);
    1974                 :             :         /* to state is now useless, but we leave it to pushfwd() to clean up */
    1975                 :        1851 :         return 1;
    1976                 :        7073 : }
    1977                 :             : 
    1978                 :             : /*
    1979                 :             :  * combine - constraint lands on an arc, what happens?
    1980                 :             :  *
    1981                 :             :  * #def INCOMPATIBLE    1       // destroys arc
    1982                 :             :  * #def SATISFIED               2       // constraint satisfied
    1983                 :             :  * #def COMPATIBLE              3       // compatible but not satisfied yet
    1984                 :             :  * #def REPLACEARC              4       // replace arc's color with constraint color
    1985                 :             :  */
    1986                 :             : static int
    1987                 :       17610 : combine(struct nfa *nfa,
    1988                 :             :                 struct arc *con,
    1989                 :             :                 struct arc *a)
    1990                 :             : {
    1991                 :             : #define  CA(ct,at)       (((ct)<<CHAR_BIT) | (at))
    1992                 :             : 
    1993   [ -  +  +  +  :       17610 :         switch (CA(con->type, a->type))
                +  +  + ]
    1994                 :             :         {
    1995                 :             :                 case CA('^', PLAIN):    /* newlines are handled separately */
    1996                 :             :                 case CA('$', PLAIN):
    1997                 :        8539 :                         return INCOMPATIBLE;
    1998                 :             :                         break;
    1999                 :             :                 case CA(AHEAD, PLAIN):  /* color constraints meet colors */
    2000                 :             :                 case CA(BEHIND, PLAIN):
    2001         [ +  + ]:         406 :                         if (con->co == a->co)
    2002                 :          67 :                                 return SATISFIED;
    2003         [ -  + ]:         339 :                         if (con->co == RAINBOW)
    2004                 :             :                         {
    2005                 :             :                                 /* con is satisfied unless arc's color is a pseudocolor */
    2006         [ #  # ]:           0 :                                 if (!(nfa->cm->cd[a->co].flags & PSEUDO))
    2007                 :           0 :                                         return SATISFIED;
    2008                 :           0 :                         }
    2009         [ +  + ]:         339 :                         else if (a->co == RAINBOW)
    2010                 :             :                         {
    2011                 :             :                                 /* con is incompatible if it's for a pseudocolor */
    2012                 :             :                                 /* (this is hypothetical; we make no such constraints today) */
    2013         [ -  + ]:         133 :                                 if (nfa->cm->cd[con->co].flags & PSEUDO)
    2014                 :           0 :                                         return INCOMPATIBLE;
    2015                 :             :                                 /* otherwise, constraint constrains arc to be only its color */
    2016                 :         133 :                                 return REPLACEARC;
    2017                 :             :                         }
    2018                 :         206 :                         return INCOMPATIBLE;
    2019                 :             :                         break;
    2020                 :             :                 case CA('^', '^'):              /* collision, similar constraints */
    2021                 :             :                 case CA('$', '$'):
    2022         [ +  + ]:        5622 :                         if (con->co == a->co)     /* true duplication */
    2023                 :        3104 :                                 return SATISFIED;
    2024                 :        2518 :                         return INCOMPATIBLE;
    2025                 :             :                         break;
    2026                 :             :                 case CA(AHEAD, AHEAD):  /* collision, similar constraints */
    2027                 :             :                 case CA(BEHIND, BEHIND):
    2028         [ +  + ]:         197 :                         if (con->co == a->co)     /* true duplication */
    2029                 :          39 :                                 return SATISFIED;
    2030         [ -  + ]:         158 :                         if (con->co == RAINBOW)
    2031                 :             :                         {
    2032                 :             :                                 /* con is satisfied unless arc's color is a pseudocolor */
    2033         [ #  # ]:           0 :                                 if (!(nfa->cm->cd[a->co].flags & PSEUDO))
    2034                 :           0 :                                         return SATISFIED;
    2035                 :           0 :                         }
    2036         [ -  + ]:         158 :                         else if (a->co == RAINBOW)
    2037                 :             :                         {
    2038                 :             :                                 /* con is incompatible if it's for a pseudocolor */
    2039                 :             :                                 /* (this is hypothetical; we make no such constraints today) */
    2040         [ #  # ]:           0 :                                 if (nfa->cm->cd[con->co].flags & PSEUDO)
    2041                 :           0 :                                         return INCOMPATIBLE;
    2042                 :             :                                 /* otherwise, constraint constrains arc to be only its color */
    2043                 :           0 :                                 return REPLACEARC;
    2044                 :             :                         }
    2045                 :         158 :                         return INCOMPATIBLE;
    2046                 :             :                         break;
    2047                 :             :                 case CA('^', BEHIND):   /* collision, dissimilar constraints */
    2048                 :             :                 case CA(BEHIND, '^'):
    2049                 :             :                 case CA('$', AHEAD):
    2050                 :             :                 case CA(AHEAD, '$'):
    2051                 :         436 :                         return INCOMPATIBLE;
    2052                 :             :                         break;
    2053                 :             :                 case CA('^', '$'):              /* constraints passing each other */
    2054                 :             :                 case CA('^', AHEAD):
    2055                 :             :                 case CA(BEHIND, '$'):
    2056                 :             :                 case CA(BEHIND, AHEAD):
    2057                 :             :                 case CA('$', '^'):
    2058                 :             :                 case CA('$', BEHIND):
    2059                 :             :                 case CA(AHEAD, '^'):
    2060                 :             :                 case CA(AHEAD, BEHIND):
    2061                 :             :                 case CA('^', LACON):
    2062                 :             :                 case CA(BEHIND, LACON):
    2063                 :             :                 case CA('$', LACON):
    2064                 :             :                 case CA(AHEAD, LACON):
    2065                 :        2410 :                         return COMPATIBLE;
    2066                 :             :                         break;
    2067                 :             :         }
    2068                 :           0 :         assert(NOTREACHED);
    2069                 :           0 :         return INCOMPATIBLE;            /* for benefit of blind compilers */
    2070                 :       17610 : }
    2071                 :             : 
    2072                 :             : /*
    2073                 :             :  * fixempties - get rid of EMPTY arcs
    2074                 :             :  */
    2075                 :             : static void
    2076                 :        1924 : fixempties(struct nfa *nfa,
    2077                 :             :                    FILE *f)                             /* for debug output; NULL none */
    2078                 :             : {
    2079                 :        1924 :         struct state *s;
    2080                 :        1924 :         struct state *s2;
    2081                 :        1924 :         struct state *nexts;
    2082                 :        1924 :         struct arc *a;
    2083                 :        1924 :         struct arc *nexta;
    2084                 :        1924 :         int                     totalinarcs;
    2085                 :        1924 :         struct arc **inarcsorig;
    2086                 :        1924 :         struct arc **arcarray;
    2087                 :        1924 :         int                     arccount;
    2088                 :        1924 :         int                     prevnins;
    2089                 :        1924 :         int                     nskip;
    2090                 :             : 
    2091                 :             :         /*
    2092                 :             :          * First, get rid of any states whose sole out-arc is an EMPTY, since
    2093                 :             :          * they're basically just aliases for their successor.  The parsing
    2094                 :             :          * algorithm creates enough of these that it's worth special-casing this.
    2095                 :             :          */
    2096   [ +  +  +  + ]:       43534 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2097                 :             :         {
    2098                 :       41610 :                 nexts = s->next;
    2099   [ +  +  +  + ]:       41610 :                 if (s->flag || s->nouts != 1)
    2100                 :       10568 :                         continue;
    2101                 :       31042 :                 a = s->outs;
    2102         [ +  - ]:       31042 :                 assert(a != NULL && a->outchain == NULL);
    2103         [ +  + ]:       31042 :                 if (a->type != EMPTY)
    2104                 :       19201 :                         continue;
    2105         [ +  - ]:       11841 :                 if (s != a->to)
    2106                 :       11841 :                         moveins(nfa, s, a->to);
    2107                 :       11841 :                 dropstate(nfa, s);
    2108                 :       11841 :         }
    2109                 :             : 
    2110                 :             :         /*
    2111                 :             :          * Similarly, get rid of any state with a single EMPTY in-arc, by folding
    2112                 :             :          * it into its predecessor.
    2113                 :             :          */
    2114   [ +  +  +  + ]:       31693 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2115                 :             :         {
    2116                 :       29769 :                 nexts = s->next;
    2117                 :             :                 /* while we're at it, ensure tmp fields are clear for next step */
    2118         [ +  - ]:       29769 :                 assert(s->tmp == NULL);
    2119   [ +  +  +  + ]:       29769 :                 if (s->flag || s->nins != 1)
    2120                 :       10178 :                         continue;
    2121                 :       19591 :                 a = s->ins;
    2122         [ +  - ]:       19591 :                 assert(a != NULL && a->inchain == NULL);
    2123         [ +  + ]:       19591 :                 if (a->type != EMPTY)
    2124                 :       18328 :                         continue;
    2125         [ -  + ]:        1263 :                 if (s != a->from)
    2126                 :        1263 :                         moveouts(nfa, s, a->from);
    2127                 :        1263 :                 dropstate(nfa, s);
    2128                 :        1263 :         }
    2129                 :             : 
    2130         [ -  + ]:        1924 :         if (NISERR())
    2131                 :           0 :                 return;
    2132                 :             : 
    2133                 :             :         /*
    2134                 :             :          * For each remaining NFA state, find all other states from which it is
    2135                 :             :          * reachable by a chain of one or more EMPTY arcs.  Then generate new arcs
    2136                 :             :          * that eliminate the need for each such chain.
    2137                 :             :          *
    2138                 :             :          * We could replace a chain of EMPTY arcs that leads from a "from" state
    2139                 :             :          * to a "to" state either by pushing non-EMPTY arcs forward (linking
    2140                 :             :          * directly from "from"'s predecessors to "to") or by pulling them back
    2141                 :             :          * (linking directly from "from" to "to"'s successors).  We choose to
    2142                 :             :          * always do the former; this choice is somewhat arbitrary, but the
    2143                 :             :          * approach below requires that we uniformly do one or the other.
    2144                 :             :          *
    2145                 :             :          * Suppose we have a chain of N successive EMPTY arcs (where N can easily
    2146                 :             :          * approach the size of the NFA).  All of the intermediate states must
    2147                 :             :          * have additional inarcs and outarcs, else they'd have been removed by
    2148                 :             :          * the steps above.  Assuming their inarcs are mostly not empties, we will
    2149                 :             :          * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
    2150                 :             :          * state in the chain must be duplicated to lead to all its successor
    2151                 :             :          * states as well.  So there is no hope of doing less than O(N^2) work;
    2152                 :             :          * however, we should endeavor to keep the big-O cost from being even
    2153                 :             :          * worse than that, which it can easily become without care.  In
    2154                 :             :          * particular, suppose we were to copy all S1's inarcs forward to S2, and
    2155                 :             :          * then also to S3, and then later we consider pushing S2's inarcs forward
    2156                 :             :          * to S3.  If we include the arcs already copied from S1 in that, we'd be
    2157                 :             :          * doing O(N^3) work.  (The duplicate-arc elimination built into newarc()
    2158                 :             :          * and its cohorts would get rid of the extra arcs, but not without cost.)
    2159                 :             :          *
    2160                 :             :          * We can avoid this cost by treating only arcs that existed at the start
    2161                 :             :          * of this phase as candidates to be pushed forward.  To identify those,
    2162                 :             :          * we remember the first inarc each state had to start with.  We rely on
    2163                 :             :          * the fact that newarc() and friends put new arcs on the front of their
    2164                 :             :          * to-states' inchains, and that this phase never deletes arcs, so that
    2165                 :             :          * the original arcs must be the last arcs in their to-states' inchains.
    2166                 :             :          *
    2167                 :             :          * So the process here is that, for each state in the NFA, we gather up
    2168                 :             :          * all non-EMPTY inarcs of states that can reach the target state via
    2169                 :             :          * EMPTY arcs.  We then sort, de-duplicate, and merge these arcs into the
    2170                 :             :          * target state's inchain.  (We can safely use sort-merge for this as long
    2171                 :             :          * as we update each state's original-arcs pointer after we add arcs to
    2172                 :             :          * it; the sort step of mergeins probably changed the order of the old
    2173                 :             :          * arcs.)
    2174                 :             :          *
    2175                 :             :          * Another refinement worth making is that, because we only add non-EMPTY
    2176                 :             :          * arcs during this phase, and all added arcs have the same from-state as
    2177                 :             :          * the non-EMPTY arc they were cloned from, we know ahead of time that any
    2178                 :             :          * states having only EMPTY outarcs will be useless for lack of outarcs
    2179                 :             :          * after we drop the EMPTY arcs.  (They cannot gain non-EMPTY outarcs if
    2180                 :             :          * they had none to start with.)  So we need not bother to update the
    2181                 :             :          * inchains of such states at all.
    2182                 :             :          */
    2183                 :             : 
    2184                 :             :         /* Remember the states' first original inarcs */
    2185                 :             :         /* ... and while at it, count how many old inarcs there are altogether */
    2186                 :        1924 :         inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
    2187         [ +  - ]:        1924 :         if (inarcsorig == NULL)
    2188                 :             :         {
    2189         [ #  # ]:           0 :                 NERR(REG_ESPACE);
    2190                 :           0 :                 return;
    2191                 :             :         }
    2192                 :        1924 :         totalinarcs = 0;
    2193         [ +  + ]:       30430 :         for (s = nfa->states; s != NULL; s = s->next)
    2194                 :             :         {
    2195                 :       28506 :                 inarcsorig[s->no] = s->ins;
    2196                 :       28506 :                 totalinarcs += s->nins;
    2197                 :       28506 :         }
    2198                 :             : 
    2199                 :             :         /*
    2200                 :             :          * Create a workspace for accumulating the inarcs to be added to the
    2201                 :             :          * current target state.  totalinarcs is probably a considerable
    2202                 :             :          * overestimate of the space needed, but the NFA is unlikely to be large
    2203                 :             :          * enough at this point to make it worth being smarter.
    2204                 :             :          */
    2205                 :        1924 :         arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
    2206         [ +  - ]:        1924 :         if (arcarray == NULL)
    2207                 :             :         {
    2208         [ #  # ]:           0 :                 NERR(REG_ESPACE);
    2209                 :           0 :                 FREE(inarcsorig);
    2210                 :           0 :                 return;
    2211                 :             :         }
    2212                 :             : 
    2213                 :             :         /* And iterate over the target states */
    2214   [ +  +  +  + ]:       29603 :         for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2215                 :             :         {
    2216                 :             :                 /* Ignore target states without non-EMPTY outarcs, per note above */
    2217   [ +  +  +  + ]:       27679 :                 if (!s->flag && !hasnonemptyout(s))
    2218                 :         364 :                         continue;
    2219                 :             : 
    2220                 :             :                 /* Find predecessor states and accumulate their original inarcs */
    2221                 :       27315 :                 arccount = 0;
    2222         [ +  + ]:     2402178 :                 for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
    2223                 :             :                 {
    2224                 :             :                         /* Add s2's original inarcs to arcarray[], but ignore empties */
    2225         [ +  + ]:     7131145 :                         for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
    2226                 :             :                         {
    2227         [ +  + ]:     4756282 :                                 if (a->type != EMPTY)
    2228                 :     2380853 :                                         arcarray[arccount++] = a;
    2229                 :     4756282 :                         }
    2230                 :             : 
    2231                 :             :                         /* Reset the tmp fields as we walk back */
    2232                 :     2374863 :                         nexts = s2->tmp;
    2233                 :     2374863 :                         s2->tmp = NULL;
    2234                 :     2374863 :                 }
    2235                 :       27315 :                 s->tmp = NULL;
    2236         [ -  + ]:       27315 :                 assert(arccount <= totalinarcs);
    2237                 :             : 
    2238                 :             :                 /* Remember how many original inarcs this state has */
    2239                 :       27315 :                 prevnins = s->nins;
    2240                 :             : 
    2241                 :             :                 /* Add non-duplicate inarcs to target state */
    2242                 :       27315 :                 mergeins(nfa, s, arcarray, arccount);
    2243                 :             : 
    2244                 :             :                 /* Now we must update the state's inarcsorig pointer */
    2245                 :       27315 :                 nskip = s->nins - prevnins;
    2246                 :       27315 :                 a = s->ins;
    2247         [ +  + ]:     2406155 :                 while (nskip-- > 0)
    2248                 :     2378840 :                         a = a->inchain;
    2249                 :       27315 :                 inarcsorig[s->no] = a;
    2250                 :       27315 :         }
    2251                 :             : 
    2252                 :        1924 :         FREE(arcarray);
    2253                 :        1924 :         FREE(inarcsorig);
    2254                 :             : 
    2255         [ +  + ]:        1924 :         if (NISERR())
    2256                 :           1 :                 return;
    2257                 :             : 
    2258                 :             :         /*
    2259                 :             :          * Now remove all the EMPTY arcs, since we don't need them anymore.
    2260                 :             :          */
    2261         [ +  + ]:       27427 :         for (s = nfa->states; s != NULL; s = s->next)
    2262                 :             :         {
    2263         [ +  + ]:       73802 :                 for (a = s->outs; a != NULL; a = nexta)
    2264                 :             :                 {
    2265                 :       48298 :                         nexta = a->outchain;
    2266         [ +  + ]:       48298 :                         if (a->type == EMPTY)
    2267                 :        1695 :                                 freearc(nfa, a);
    2268                 :       48298 :                 }
    2269                 :       25504 :         }
    2270                 :             : 
    2271                 :             :         /*
    2272                 :             :          * And remove any states that have become useless.  (This cleanup is not
    2273                 :             :          * very thorough, and would be even less so if we tried to combine it with
    2274                 :             :          * the previous step; but cleanup() will take care of anything we miss.)
    2275                 :             :          */
    2276         [ +  + ]:       27427 :         for (s = nfa->states; s != NULL; s = nexts)
    2277                 :             :         {
    2278                 :       25504 :                 nexts = s->next;
    2279   [ +  +  +  + ]:       25504 :                 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2280                 :         364 :                         dropstate(nfa, s);
    2281                 :       25504 :         }
    2282                 :             : 
    2283         [ +  - ]:        1923 :         if (f != NULL)
    2284                 :           0 :                 dumpnfa(nfa, f);
    2285         [ -  + ]:        1924 : }
    2286                 :             : 
    2287                 :             : /*
    2288                 :             :  * emptyreachable - recursively find all states that can reach s by EMPTY arcs
    2289                 :             :  *
    2290                 :             :  * The return value is the last such state found.  Its tmp field links back
    2291                 :             :  * to the next-to-last such state, and so on back to s, so that all these
    2292                 :             :  * states can be located without searching the whole NFA.
    2293                 :             :  *
    2294                 :             :  * Since this is only used in fixempties(), we pass in the inarcsorig[] array
    2295                 :             :  * maintained by that function.  This lets us skip over all new inarcs, which
    2296                 :             :  * are certainly not EMPTY arcs.
    2297                 :             :  *
    2298                 :             :  * The maximum recursion depth here is equal to the length of the longest
    2299                 :             :  * loop-free chain of EMPTY arcs, which is surely no more than the size of
    2300                 :             :  * the NFA ... but that could still be enough to cause trouble.
    2301                 :             :  */
    2302                 :             : static struct state *
    2303                 :     2402178 : emptyreachable(struct nfa *nfa,
    2304                 :             :                            struct state *s,
    2305                 :             :                            struct state *lastfound,
    2306                 :             :                            struct arc **inarcsorig)
    2307                 :             : {
    2308                 :     2402178 :         struct arc *a;
    2309                 :             : 
    2310                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    2311         [ +  - ]:     2402178 :         if (STACK_TOO_DEEP(nfa->v->re))
    2312                 :             :         {
    2313         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    2314                 :           0 :                 return lastfound;
    2315                 :             :         }
    2316                 :             : 
    2317                 :     2402178 :         s->tmp = lastfound;
    2318                 :     2402178 :         lastfound = s;
    2319         [ +  + ]:     7195958 :         for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
    2320                 :             :         {
    2321   [ +  +  +  + ]:     4793780 :                 if (a->type == EMPTY && a->from->tmp == NULL)
    2322                 :     2374863 :                         lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
    2323                 :     4793780 :         }
    2324                 :     2402178 :         return lastfound;
    2325                 :     2402178 : }
    2326                 :             : 
    2327                 :             : /*
    2328                 :             :  * isconstraintarc - detect whether an arc is of a constraint type
    2329                 :             :  */
    2330                 :             : static inline int
    2331                 :      103943 : isconstraintarc(struct arc *a)
    2332                 :             : {
    2333         [ +  + ]:      103943 :         switch (a->type)
    2334                 :             :         {
    2335                 :             :                 case '^':
    2336                 :             :                 case '$':
    2337                 :             :                 case BEHIND:
    2338                 :             :                 case AHEAD:
    2339                 :             :                 case LACON:
    2340                 :       28719 :                         return 1;
    2341                 :             :         }
    2342                 :       75224 :         return 0;
    2343                 :      103943 : }
    2344                 :             : 
    2345                 :             : /*
    2346                 :             :  * hasconstraintout - does state have a constraint out arc?
    2347                 :             :  */
    2348                 :             : static int
    2349                 :        1852 : hasconstraintout(struct state *s)
    2350                 :             : {
    2351                 :        1852 :         struct arc *a;
    2352                 :             : 
    2353         [ +  + ]:        3882 :         for (a = s->outs; a != NULL; a = a->outchain)
    2354                 :             :         {
    2355         [ +  + ]:        3008 :                 if (isconstraintarc(a))
    2356                 :         978 :                         return 1;
    2357                 :        2030 :         }
    2358                 :         874 :         return 0;
    2359                 :        1852 : }
    2360                 :             : 
    2361                 :             : /*
    2362                 :             :  * fixconstraintloops - get rid of loops containing only constraint arcs
    2363                 :             :  *
    2364                 :             :  * A loop of states that contains only constraint arcs is useless, since
    2365                 :             :  * passing around the loop represents no forward progress.  Moreover, it
    2366                 :             :  * would cause infinite looping in pullback/pushfwd, so we need to get rid
    2367                 :             :  * of such loops before doing that.
    2368                 :             :  */
    2369                 :             : static void
    2370                 :        1924 : fixconstraintloops(struct nfa *nfa,
    2371                 :             :                                    FILE *f)             /* for debug output; NULL none */
    2372                 :             : {
    2373                 :        1924 :         struct state *s;
    2374                 :        1924 :         struct state *nexts;
    2375                 :        1924 :         struct arc *a;
    2376                 :        1924 :         struct arc *nexta;
    2377                 :        1924 :         int                     hasconstraints;
    2378                 :             : 
    2379                 :             :         /*
    2380                 :             :          * In the trivial case of a state that loops to itself, we can just drop
    2381                 :             :          * the constraint arc altogether.  This is worth special-casing because
    2382                 :             :          * such loops are far more common than loops containing multiple states.
    2383                 :             :          * While we're at it, note whether any constraint arcs survive.
    2384                 :             :          */
    2385                 :        1924 :         hasconstraints = 0;
    2386   [ +  +  +  + ]:       27064 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2387                 :             :         {
    2388                 :       25140 :                 nexts = s->next;
    2389                 :             :                 /* while we're at it, ensure tmp fields are clear for next step */
    2390         [ +  - ]:       25140 :                 assert(s->tmp == NULL);
    2391   [ +  +  +  + ]:       71377 :                 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
    2392                 :             :                 {
    2393                 :       46237 :                         nexta = a->outchain;
    2394         [ +  + ]:       46237 :                         if (isconstraintarc(a))
    2395                 :             :                         {
    2396         [ +  + ]:       11478 :                                 if (a->to == s)
    2397                 :          28 :                                         freearc(nfa, a);
    2398                 :             :                                 else
    2399                 :       11450 :                                         hasconstraints = 1;
    2400                 :       11478 :                         }
    2401                 :       46237 :                 }
    2402                 :             :                 /* If we removed all the outarcs, the state is useless. */
    2403   [ +  +  +  - ]:       25140 :                 if (s->nouts == 0 && !s->flag)
    2404                 :           0 :                         dropstate(nfa, s);
    2405                 :       25140 :         }
    2406                 :             : 
    2407                 :             :         /* Nothing to do if no remaining constraint arcs */
    2408   [ +  +  -  + ]:        1924 :         if (NISERR() || !hasconstraints)
    2409                 :           1 :                 return;
    2410                 :             : 
    2411                 :             :         /*
    2412                 :             :          * Starting from each remaining NFA state, search outwards for a
    2413                 :             :          * constraint loop.  If we find a loop, break the loop, then start the
    2414                 :             :          * search over.  (We could possibly retain some state from the first scan,
    2415                 :             :          * but it would complicate things greatly, and multi-state constraint
    2416                 :             :          * loops are rare enough that it's not worth optimizing the case.)
    2417                 :             :          */
    2418                 :             : restart:
    2419   [ +  +  +  + ]:       28768 :         for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
    2420                 :             :         {
    2421         [ +  + ]:       26845 :                 if (findconstraintloop(nfa, s))
    2422                 :          35 :                         goto restart;
    2423                 :       26810 :         }
    2424                 :             : 
    2425         [ -  + ]:        1923 :         if (NISERR())
    2426                 :           0 :                 return;
    2427                 :             : 
    2428                 :             :         /*
    2429                 :             :          * Now remove any states that have become useless.  (This cleanup is not
    2430                 :             :          * very thorough, and would be even less so if we tried to combine it with
    2431                 :             :          * the previous step; but cleanup() will take care of anything we miss.)
    2432                 :             :          *
    2433                 :             :          * Because findconstraintloop intentionally doesn't reset all tmp fields,
    2434                 :             :          * we have to clear them after it's done.  This is a convenient place to
    2435                 :             :          * do that, too.
    2436                 :             :          */
    2437         [ +  + ]:       27222 :         for (s = nfa->states; s != NULL; s = nexts)
    2438                 :             :         {
    2439                 :       25299 :                 nexts = s->next;
    2440                 :       25299 :                 s->tmp = NULL;
    2441   [ +  +  +  + ]:       25299 :                 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
    2442                 :          31 :                         dropstate(nfa, s);
    2443                 :       25299 :         }
    2444                 :             : 
    2445         [ -  + ]:        1923 :         if (f != NULL)
    2446                 :           0 :                 dumpnfa(nfa, f);
    2447         [ -  + ]:        1924 : }
    2448                 :             : 
    2449                 :             : /*
    2450                 :             :  * findconstraintloop - recursively find a loop of constraint arcs
    2451                 :             :  *
    2452                 :             :  * If we find a loop, break it by calling breakconstraintloop(), then
    2453                 :             :  * return 1; otherwise return 0.
    2454                 :             :  *
    2455                 :             :  * State tmp fields are guaranteed all NULL on a success return, because
    2456                 :             :  * breakconstraintloop does that.  After a failure return, any state that
    2457                 :             :  * is known not to be part of a loop is marked with s->tmp == s; this allows
    2458                 :             :  * us not to have to re-prove that fact on later calls.  (This convention is
    2459                 :             :  * workable because we already eliminated single-state loops.)
    2460                 :             :  *
    2461                 :             :  * Note that the found loop doesn't necessarily include the first state we
    2462                 :             :  * are called on.  Any loop reachable from that state will do.
    2463                 :             :  *
    2464                 :             :  * The maximum recursion depth here is one more than the length of the longest
    2465                 :             :  * loop-free chain of constraint arcs, which is surely no more than the size
    2466                 :             :  * of the NFA ... but that could still be enough to cause trouble.
    2467                 :             :  */
    2468                 :             : static int
    2469                 :       41070 : findconstraintloop(struct nfa *nfa, struct state *s)
    2470                 :             : {
    2471                 :       41070 :         struct arc *a;
    2472                 :             : 
    2473                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    2474         [ -  + ]:       41070 :         if (STACK_TOO_DEEP(nfa->v->re))
    2475                 :             :         {
    2476         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    2477                 :           0 :                 return 1;                               /* to exit as quickly as possible */
    2478                 :             :         }
    2479                 :             : 
    2480         [ +  + ]:       41070 :         if (s->tmp != NULL)
    2481                 :             :         {
    2482                 :             :                 /* Already proven uninteresting? */
    2483         [ +  + ]:       13844 :                 if (s->tmp == s)
    2484                 :       13809 :                         return 0;
    2485                 :             :                 /* Found a loop involving s */
    2486                 :          35 :                 breakconstraintloop(nfa, s);
    2487                 :             :                 /* The tmp fields have been cleaned up by breakconstraintloop */
    2488                 :          35 :                 return 1;
    2489                 :             :         }
    2490         [ +  + ]:       79696 :         for (a = s->outs; a != NULL; a = a->outchain)
    2491                 :             :         {
    2492         [ +  + ]:       52576 :                 if (isconstraintarc(a))
    2493                 :             :                 {
    2494                 :       14225 :                         struct state *sto = a->to;
    2495                 :             : 
    2496         [ +  - ]:       14225 :                         assert(sto != s);
    2497                 :       14225 :                         s->tmp = sto;
    2498         [ +  + ]:       14225 :                         if (findconstraintloop(nfa, sto))
    2499                 :         106 :                                 return 1;
    2500         [ +  + ]:       14225 :                 }
    2501                 :       52470 :         }
    2502                 :             : 
    2503                 :             :         /*
    2504                 :             :          * If we get here, no constraint loop exists leading out from s.  Mark it
    2505                 :             :          * with s->tmp == s so we need not rediscover that fact again later.
    2506                 :             :          */
    2507                 :       27120 :         s->tmp = s;
    2508                 :       27120 :         return 0;
    2509                 :       41070 : }
    2510                 :             : 
    2511                 :             : /*
    2512                 :             :  * breakconstraintloop - break a loop of constraint arcs
    2513                 :             :  *
    2514                 :             :  * sinitial is any one member state of the loop.  Each loop member's tmp
    2515                 :             :  * field links to its successor within the loop.  (Note that this function
    2516                 :             :  * will reset all the tmp fields to NULL.)
    2517                 :             :  *
    2518                 :             :  * We can break the loop by, for any one state S1 in the loop, cloning its
    2519                 :             :  * loop successor state S2 (and possibly following states), and then moving
    2520                 :             :  * all S1->S2 constraint arcs to point to the cloned S2.  The cloned S2 should
    2521                 :             :  * copy any non-constraint outarcs of S2.  Constraint outarcs should be
    2522                 :             :  * dropped if they point back to S1, else they need to be copied as arcs to
    2523                 :             :  * similarly cloned states S3, S4, etc.  In general, each cloned state copies
    2524                 :             :  * non-constraint outarcs, drops constraint outarcs that would lead to itself
    2525                 :             :  * or any earlier cloned state, and sends other constraint outarcs to newly
    2526                 :             :  * cloned states.  No cloned state will have any inarcs that aren't constraint
    2527                 :             :  * arcs or do not lead from S1 or earlier-cloned states.  It's okay to drop
    2528                 :             :  * constraint back-arcs since they would not take us to any state we've not
    2529                 :             :  * already been in; therefore, no new constraint loop is created.  In this way
    2530                 :             :  * we generate a modified NFA that can still represent every useful state
    2531                 :             :  * sequence, but not sequences that represent state loops with no consumption
    2532                 :             :  * of input data.  Note that the set of cloned states will certainly include
    2533                 :             :  * all of the loop member states other than S1, and it may also include
    2534                 :             :  * non-loop states that are reachable from S2 via constraint arcs.  This is
    2535                 :             :  * important because there is no guarantee that findconstraintloop found a
    2536                 :             :  * maximal loop (and searching for one would be NP-hard, so don't try).
    2537                 :             :  * Frequently the "non-loop states" are actually part of a larger loop that
    2538                 :             :  * we didn't notice, and indeed there may be several overlapping loops.
    2539                 :             :  * This technique ensures convergence in such cases, while considering only
    2540                 :             :  * the originally-found loop does not.
    2541                 :             :  *
    2542                 :             :  * If there is only one S1->S2 constraint arc, then that constraint is
    2543                 :             :  * certainly satisfied when we enter any of the clone states.  This means that
    2544                 :             :  * in the common case where many of the constraint arcs are identically
    2545                 :             :  * labeled, we can merge together clone states linked by a similarly-labeled
    2546                 :             :  * constraint: if we can get to the first one we can certainly get to the
    2547                 :             :  * second, so there's no need to distinguish.  This greatly reduces the number
    2548                 :             :  * of new states needed, so we preferentially break the given loop at a state
    2549                 :             :  * pair where this is true.
    2550                 :             :  *
    2551                 :             :  * Furthermore, it's fairly common to find that a cloned successor state has
    2552                 :             :  * no outarcs, especially if we're a bit aggressive about removing unnecessary
    2553                 :             :  * outarcs.  If that happens, then there is simply not any interesting state
    2554                 :             :  * that can be reached through the predecessor's loop arcs, which means we can
    2555                 :             :  * break the loop just by removing those loop arcs, with no new states added.
    2556                 :             :  */
    2557                 :             : static void
    2558                 :          35 : breakconstraintloop(struct nfa *nfa, struct state *sinitial)
    2559                 :             : {
    2560                 :          35 :         struct state *s;
    2561                 :          35 :         struct state *shead;
    2562                 :          35 :         struct state *stail;
    2563                 :          35 :         struct state *sclone;
    2564                 :          35 :         struct state *nexts;
    2565                 :          35 :         struct arc *refarc;
    2566                 :          35 :         struct arc *a;
    2567                 :          35 :         struct arc *nexta;
    2568                 :             : 
    2569                 :             :         /*
    2570                 :             :          * Start by identifying which loop step we want to break at.
    2571                 :             :          * Preferentially this is one with only one constraint arc.  (XXX are
    2572                 :             :          * there any other secondary heuristics we want to use here?)  Set refarc
    2573                 :             :          * to point to the selected lone constraint arc, if there is one.
    2574                 :             :          */
    2575                 :          35 :         refarc = NULL;
    2576                 :          35 :         s = sinitial;
    2577                 :          35 :         do
    2578                 :             :         {
    2579                 :          88 :                 nexts = s->tmp;
    2580         [ +  - ]:          88 :                 assert(nexts != s);             /* should not see any one-element loops */
    2581         [ +  + ]:          88 :                 if (refarc == NULL)
    2582                 :             :                 {
    2583                 :          54 :                         int                     narcs = 0;
    2584                 :             : 
    2585         [ +  + ]:         446 :                         for (a = s->outs; a != NULL; a = a->outchain)
    2586                 :             :                         {
    2587   [ +  +  -  + ]:         392 :                                 if (a->to == nexts && isconstraintarc(a))
    2588                 :             :                                 {
    2589                 :         128 :                                         refarc = a;
    2590                 :         128 :                                         narcs++;
    2591                 :         128 :                                 }
    2592                 :         392 :                         }
    2593         [ -  + ]:          54 :                         assert(narcs > 0);
    2594         [ +  + ]:          54 :                         if (narcs > 1)
    2595                 :          28 :                                 refarc = NULL;  /* multiple constraint arcs here, no good */
    2596                 :          54 :                 }
    2597                 :          88 :                 s = nexts;
    2598         [ +  + ]:          88 :         } while (s != sinitial);
    2599                 :             : 
    2600         [ +  + ]:          35 :         if (refarc)
    2601                 :             :         {
    2602                 :             :                 /* break at the refarc */
    2603                 :          26 :                 shead = refarc->from;
    2604                 :          26 :                 stail = refarc->to;
    2605         [ +  - ]:          26 :                 assert(stail == shead->tmp);
    2606                 :          26 :         }
    2607                 :             :         else
    2608                 :             :         {
    2609                 :             :                 /* for lack of a better idea, break after sinitial */
    2610                 :           9 :                 shead = sinitial;
    2611                 :           9 :                 stail = sinitial->tmp;
    2612                 :             :         }
    2613                 :             : 
    2614                 :             :         /*
    2615                 :             :          * Reset the tmp fields so that we can use them for local storage in
    2616                 :             :          * clonesuccessorstates.  (findconstraintloop won't mind, since it's just
    2617                 :             :          * going to abandon its search anyway.)
    2618                 :             :          */
    2619         [ +  + ]:        3207 :         for (s = nfa->states; s != NULL; s = s->next)
    2620                 :        3172 :                 s->tmp = NULL;
    2621                 :             : 
    2622                 :             :         /*
    2623                 :             :          * Recursively build clone state(s) as needed.
    2624                 :             :          */
    2625                 :          35 :         sclone = newstate(nfa);
    2626         [ +  - ]:          35 :         if (sclone == NULL)
    2627                 :             :         {
    2628         [ #  # ]:           0 :                 assert(NISERR());
    2629                 :           0 :                 return;
    2630                 :             :         }
    2631                 :             : 
    2632                 :          70 :         clonesuccessorstates(nfa, stail, sclone, shead, refarc,
    2633                 :          35 :                                                  NULL, NULL, nfa->nstates);
    2634                 :             : 
    2635         [ -  + ]:          35 :         if (NISERR())
    2636                 :           0 :                 return;
    2637                 :             : 
    2638                 :             :         /*
    2639                 :             :          * It's possible that sclone has no outarcs at all, in which case it's
    2640                 :             :          * useless.  (We don't try extremely hard to get rid of useless states
    2641                 :             :          * here, but this is an easy and fairly common case.)
    2642                 :             :          */
    2643         [ +  + ]:          35 :         if (sclone->nouts == 0)
    2644                 :             :         {
    2645                 :           9 :                 freestate(nfa, sclone);
    2646                 :           9 :                 sclone = NULL;
    2647                 :           9 :         }
    2648                 :             : 
    2649                 :             :         /*
    2650                 :             :          * Move shead's constraint-loop arcs to point to sclone, or just drop them
    2651                 :             :          * if we discovered we don't need sclone.
    2652                 :             :          */
    2653         [ +  + ]:         306 :         for (a = shead->outs; a != NULL; a = nexta)
    2654                 :             :         {
    2655                 :         271 :                 nexta = a->outchain;
    2656   [ +  +  -  + ]:         271 :                 if (a->to == stail && isconstraintarc(a))
    2657                 :             :                 {
    2658         [ +  + ]:          58 :                         if (sclone)
    2659                 :          47 :                                 cparc(nfa, a, shead, sclone);
    2660                 :          58 :                         freearc(nfa, a);
    2661         [ +  - ]:          58 :                         if (NISERR())
    2662                 :           0 :                                 break;
    2663                 :          58 :                 }
    2664                 :         271 :         }
    2665         [ -  + ]:          35 : }
    2666                 :             : 
    2667                 :             : /*
    2668                 :             :  * clonesuccessorstates - create a tree of constraint-arc successor states
    2669                 :             :  *
    2670                 :             :  * ssource is the state to be cloned, and sclone is the state to copy its
    2671                 :             :  * outarcs into.  sclone's inarcs, if any, should already be set up.
    2672                 :             :  *
    2673                 :             :  * spredecessor is the original predecessor state that we are trying to build
    2674                 :             :  * successors for (it may not be the immediate predecessor of ssource).
    2675                 :             :  * refarc, if not NULL, is the original constraint arc that is known to have
    2676                 :             :  * been traversed out of spredecessor to reach the successor(s).
    2677                 :             :  *
    2678                 :             :  * For each cloned successor state, we transiently create a "donemap" that is
    2679                 :             :  * a boolean array showing which source states we've already visited for this
    2680                 :             :  * clone state.  This prevents infinite recursion as well as useless repeat
    2681                 :             :  * visits to the same state subtree (which can add up fast, since typical NFAs
    2682                 :             :  * have multiple redundant arc pathways).  Each donemap is a char array
    2683                 :             :  * indexed by state number.  The donemaps are all of the same size "nstates",
    2684                 :             :  * which is nfa->nstates as of the start of the recursion.  This is enough to
    2685                 :             :  * have entries for all pre-existing states, but *not* entries for clone
    2686                 :             :  * states created during the recursion.  That's okay since we have no need to
    2687                 :             :  * mark those.
    2688                 :             :  *
    2689                 :             :  * curdonemap is NULL when recursing to a new sclone state, or sclone's
    2690                 :             :  * donemap when we are recursing without having created a new state (which we
    2691                 :             :  * do when we decide we can merge a successor state into the current clone
    2692                 :             :  * state).  outerdonemap is NULL at the top level and otherwise the parent
    2693                 :             :  * clone state's donemap.
    2694                 :             :  *
    2695                 :             :  * The successor states we create and fill here form a strict tree structure,
    2696                 :             :  * with each state having exactly one predecessor, except that the toplevel
    2697                 :             :  * state has no inarcs as yet (breakconstraintloop will add its inarcs from
    2698                 :             :  * spredecessor after we're done).  Thus, we can examine sclone's inarcs back
    2699                 :             :  * to the root, plus refarc if any, to identify the set of constraints already
    2700                 :             :  * known valid at the current point.  This allows us to avoid generating extra
    2701                 :             :  * successor states.
    2702                 :             :  */
    2703                 :             : static void
    2704                 :         328 : clonesuccessorstates(struct nfa *nfa,
    2705                 :             :                                          struct state *ssource,
    2706                 :             :                                          struct state *sclone,
    2707                 :             :                                          struct state *spredecessor,
    2708                 :             :                                          struct arc *refarc,
    2709                 :             :                                          char *curdonemap,
    2710                 :             :                                          char *outerdonemap,
    2711                 :             :                                          int nstates)
    2712                 :             : {
    2713                 :         328 :         char       *donemap;
    2714                 :         328 :         struct arc *a;
    2715                 :             : 
    2716                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    2717         [ -  + ]:         328 :         if (STACK_TOO_DEEP(nfa->v->re))
    2718                 :             :         {
    2719         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    2720                 :           0 :                 return;
    2721                 :             :         }
    2722                 :             : 
    2723                 :             :         /* If this state hasn't already got a donemap, create one */
    2724                 :         328 :         donemap = curdonemap;
    2725         [ +  + ]:         328 :         if (donemap == NULL)
    2726                 :             :         {
    2727                 :         168 :                 donemap = (char *) MALLOC(nstates * sizeof(char));
    2728         [ +  - ]:         168 :                 if (donemap == NULL)
    2729                 :             :                 {
    2730         [ #  # ]:           0 :                         NERR(REG_ESPACE);
    2731                 :           0 :                         return;
    2732                 :             :                 }
    2733                 :             : 
    2734         [ +  + ]:         168 :                 if (outerdonemap != NULL)
    2735                 :             :                 {
    2736                 :             :                         /*
    2737                 :             :                          * Not at outermost recursion level, so copy the outer level's
    2738                 :             :                          * donemap; this ensures that we see states in process of being
    2739                 :             :                          * visited at outer levels, or already merged into predecessor
    2740                 :             :                          * states, as ones we shouldn't traverse back to.
    2741                 :             :                          */
    2742                 :         133 :                         memcpy(donemap, outerdonemap, nstates * sizeof(char));
    2743                 :         133 :                 }
    2744                 :             :                 else
    2745                 :             :                 {
    2746                 :             :                         /* At outermost level, only spredecessor is off-limits */
    2747                 :          35 :                         memset(donemap, 0, nstates * sizeof(char));
    2748         [ +  - ]:          35 :                         assert(spredecessor->no < nstates);
    2749                 :          35 :                         donemap[spredecessor->no] = 1;
    2750                 :             :                 }
    2751                 :         168 :         }
    2752                 :             : 
    2753                 :             :         /* Mark ssource as visited in the donemap */
    2754         [ +  - ]:         328 :         assert(ssource->no < nstates);
    2755         [ +  - ]:         328 :         assert(donemap[ssource->no] == 0);
    2756                 :         328 :         donemap[ssource->no] = 1;
    2757                 :             : 
    2758                 :             :         /*
    2759                 :             :          * We proceed by first cloning all of ssource's outarcs, creating new
    2760                 :             :          * clone states as needed but not doing more with them than that.  Then in
    2761                 :             :          * a second pass, recurse to process the child clone states.  This allows
    2762                 :             :          * us to have only one child clone state per reachable source state, even
    2763                 :             :          * when there are multiple outarcs leading to the same state.  Also, when
    2764                 :             :          * we do visit a child state, its set of inarcs is known exactly, which
    2765                 :             :          * makes it safe to apply the constraint-is-already-checked optimization.
    2766                 :             :          * Also, this ensures that we've merged all the states we can into the
    2767                 :             :          * current clone before we recurse to any children, thus possibly saving
    2768                 :             :          * them from making extra images of those states.
    2769                 :             :          *
    2770                 :             :          * While this function runs, child clone states of the current state are
    2771                 :             :          * marked by setting their tmp fields to point to the original state they
    2772                 :             :          * were cloned from.  This makes it possible to detect multiple outarcs
    2773                 :             :          * leading to the same state, and also makes it easy to distinguish clone
    2774                 :             :          * states from original states (which will have tmp == NULL).
    2775                 :             :          */
    2776   [ +  +  +  + ]:        2264 :         for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
    2777                 :             :         {
    2778                 :        1936 :                 struct state *sto = a->to;
    2779                 :             : 
    2780                 :             :                 /*
    2781                 :             :                  * We do not consider cloning successor states that have no constraint
    2782                 :             :                  * outarcs; just link to them as-is.  They cannot be part of a
    2783                 :             :                  * constraint loop so there is no need to make copies.  In particular,
    2784                 :             :                  * this rule keeps us from trying to clone the post state, which would
    2785                 :             :                  * be a bad idea.
    2786                 :             :                  */
    2787   [ +  +  +  + ]:        1936 :                 if (isconstraintarc(a) && hasconstraintout(sto))
    2788                 :             :                 {
    2789                 :         978 :                         struct state *prevclone;
    2790                 :         978 :                         int                     canmerge;
    2791                 :         978 :                         struct arc *a2;
    2792                 :             : 
    2793                 :             :                         /*
    2794                 :             :                          * Back-link constraint arcs must not be followed.  Nor is there a
    2795                 :             :                          * need to revisit states previously merged into this clone.
    2796                 :             :                          */
    2797         [ +  - ]:         978 :                         assert(sto->no < nstates);
    2798         [ +  + ]:         978 :                         if (donemap[sto->no] != 0)
    2799                 :         369 :                                 continue;
    2800                 :             : 
    2801                 :             :                         /*
    2802                 :             :                          * Check whether we already have a child clone state for this
    2803                 :             :                          * source state.
    2804                 :             :                          */
    2805                 :         609 :                         prevclone = NULL;
    2806         [ +  + ]:        2895 :                         for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
    2807                 :             :                         {
    2808         [ +  + ]:        2602 :                                 if (a2->to->tmp == sto)
    2809                 :             :                                 {
    2810                 :         316 :                                         prevclone = a2->to;
    2811                 :         316 :                                         break;
    2812                 :             :                                 }
    2813                 :        2286 :                         }
    2814                 :             : 
    2815                 :             :                         /*
    2816                 :             :                          * If this arc is labeled the same as refarc, or the same as any
    2817                 :             :                          * arc we must have traversed to get to sclone, then no additional
    2818                 :             :                          * constraints need to be met to get to sto, so we should just
    2819                 :             :                          * merge its outarcs into sclone.
    2820                 :             :                          */
    2821   [ +  +  +  +  :         609 :                         if (refarc && a->type == refarc->type && a->co == refarc->co)
                   -  + ]
    2822                 :         160 :                                 canmerge = 1;
    2823                 :             :                         else
    2824                 :             :                         {
    2825                 :         449 :                                 struct state *s;
    2826                 :             : 
    2827                 :         449 :                                 canmerge = 0;
    2828         [ +  + ]:        2320 :                                 for (s = sclone; s->ins; s = s->ins->from)
    2829                 :             :                                 {
    2830         [ +  + ]:        1871 :                                         if (s->nins == 1 &&
    2831   [ +  -  +  - ]:           2 :                                                 a->type == s->ins->type && a->co == s->ins->co)
    2832                 :             :                                         {
    2833                 :           0 :                                                 canmerge = 1;
    2834                 :           0 :                                                 break;
    2835                 :             :                                         }
    2836                 :        1871 :                                 }
    2837                 :         449 :                         }
    2838                 :             : 
    2839         [ +  + ]:         609 :                         if (canmerge)
    2840                 :             :                         {
    2841                 :             :                                 /*
    2842                 :             :                                  * We can merge into sclone.  If we previously made a child
    2843                 :             :                                  * clone state, drop it; there's no need to visit it.  (This
    2844                 :             :                                  * can happen if ssource has multiple pathways to sto, and we
    2845                 :             :                                  * only just now found one that is provably a no-op.)
    2846                 :             :                                  */
    2847         [ -  + ]:         160 :                                 if (prevclone)
    2848                 :           0 :                                         dropstate(nfa, prevclone);      /* kills our outarc, too */
    2849                 :             : 
    2850                 :             :                                 /* Recurse to merge sto's outarcs into sclone */
    2851                 :         320 :                                 clonesuccessorstates(nfa,
    2852                 :         160 :                                                                          sto,
    2853                 :         160 :                                                                          sclone,
    2854                 :         160 :                                                                          spredecessor,
    2855                 :         160 :                                                                          refarc,
    2856                 :         160 :                                                                          donemap,
    2857                 :         160 :                                                                          outerdonemap,
    2858                 :         160 :                                                                          nstates);
    2859                 :             :                                 /* sto should now be marked as previously visited */
    2860   [ +  -  -  + ]:         160 :                                 assert(NISERR() || donemap[sto->no] == 1);
    2861                 :         160 :                         }
    2862         [ +  + ]:         449 :                         else if (prevclone)
    2863                 :             :                         {
    2864                 :             :                                 /*
    2865                 :             :                                  * We already have a clone state for this successor, so just
    2866                 :             :                                  * make another arc to it.
    2867                 :             :                                  */
    2868                 :         316 :                                 cparc(nfa, a, sclone, prevclone);
    2869                 :         316 :                         }
    2870                 :             :                         else
    2871                 :             :                         {
    2872                 :             :                                 /*
    2873                 :             :                                  * We need to create a new successor clone state.
    2874                 :             :                                  */
    2875                 :         133 :                                 struct state *stoclone;
    2876                 :             : 
    2877                 :         133 :                                 stoclone = newstate(nfa);
    2878         [ +  - ]:         133 :                                 if (stoclone == NULL)
    2879                 :             :                                 {
    2880         [ #  # ]:           0 :                                         assert(NISERR());
    2881                 :           0 :                                         break;
    2882                 :             :                                 }
    2883                 :             :                                 /* Mark it as to what it's a clone of */
    2884                 :         133 :                                 stoclone->tmp = sto;
    2885                 :             :                                 /* ... and add the outarc leading to it */
    2886                 :         133 :                                 cparc(nfa, a, sclone, stoclone);
    2887         [ -  + ]:         133 :                         }
    2888         [ +  + ]:         978 :                 }
    2889                 :             :                 else
    2890                 :             :                 {
    2891                 :             :                         /*
    2892                 :             :                          * Non-constraint outarcs just get copied to sclone, as do outarcs
    2893                 :             :                          * leading to states with no constraint outarc.
    2894                 :             :                          */
    2895                 :         958 :                         cparc(nfa, a, sclone, sto);
    2896                 :             :                 }
    2897      [ +  -  + ]:        1936 :         }
    2898                 :             : 
    2899                 :             :         /*
    2900                 :             :          * If we are at outer level for this clone state, recurse to all its child
    2901                 :             :          * clone states, clearing their tmp fields as we go.  (If we're not
    2902                 :             :          * outermost for sclone, leave this to be done by the outer call level.)
    2903                 :             :          * Note that if we have multiple outarcs leading to the same clone state,
    2904                 :             :          * it will only be recursed-to once.
    2905                 :             :          */
    2906         [ +  + ]:         328 :         if (curdonemap == NULL)
    2907                 :             :         {
    2908   [ +  +  +  + ]:        1063 :                 for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
    2909                 :             :                 {
    2910                 :         895 :                         struct state *stoclone = a->to;
    2911                 :         895 :                         struct state *sto = stoclone->tmp;
    2912                 :             : 
    2913         [ +  + ]:         895 :                         if (sto != NULL)
    2914                 :             :                         {
    2915                 :         133 :                                 stoclone->tmp = NULL;
    2916                 :         266 :                                 clonesuccessorstates(nfa,
    2917                 :         133 :                                                                          sto,
    2918                 :         133 :                                                                          stoclone,
    2919                 :         133 :                                                                          spredecessor,
    2920                 :         133 :                                                                          refarc,
    2921                 :             :                                                                          NULL,
    2922                 :         133 :                                                                          donemap,
    2923                 :         133 :                                                                          nstates);
    2924                 :         133 :                         }
    2925                 :         895 :                 }
    2926                 :             : 
    2927                 :             :                 /* Don't forget to free sclone's donemap when done with it */
    2928                 :         168 :                 FREE(donemap);
    2929                 :         168 :         }
    2930                 :         328 : }
    2931                 :             : 
    2932                 :             : /*
    2933                 :             :  * removecantmatch - remove CANTMATCH arcs, which are no longer useful
    2934                 :             :  * once we are done with the parsing phase.  (We need them only to
    2935                 :             :  * preserve connectedness of NFA subgraphs during parsing.)
    2936                 :             :  */
    2937                 :             : static void
    2938                 :           0 : removecantmatch(struct nfa *nfa)
    2939                 :             : {
    2940                 :           0 :         struct state *s;
    2941                 :             : 
    2942         [ #  # ]:           0 :         for (s = nfa->states; s != NULL; s = s->next)
    2943                 :             :         {
    2944                 :           0 :                 struct arc *a;
    2945                 :           0 :                 struct arc *nexta;
    2946                 :             : 
    2947         [ #  # ]:           0 :                 for (a = s->outs; a != NULL; a = nexta)
    2948                 :             :                 {
    2949                 :           0 :                         nexta = a->outchain;
    2950         [ #  # ]:           0 :                         if (a->type == CANTMATCH)
    2951                 :             :                         {
    2952                 :           0 :                                 freearc(nfa, a);
    2953         [ #  # ]:           0 :                                 if (NISERR())
    2954                 :           0 :                                         return;
    2955                 :           0 :                         }
    2956                 :           0 :                 }
    2957         [ #  # ]:           0 :         }
    2958         [ #  # ]:           0 : }
    2959                 :             : 
    2960                 :             : /*
    2961                 :             :  * cleanup - clean up NFA after optimizations
    2962                 :             :  */
    2963                 :             : static void
    2964                 :        3848 : cleanup(struct nfa *nfa)
    2965                 :             : {
    2966                 :        3848 :         struct state *s;
    2967                 :        3848 :         struct state *nexts;
    2968                 :        3848 :         int                     n;
    2969                 :             : 
    2970         [ +  + ]:        3848 :         if (NISERR())
    2971                 :           1 :                 return;
    2972                 :             : 
    2973                 :             :         /* clear out unreachable or dead-end states */
    2974                 :             :         /* use pre to mark reachable, then post to mark can-reach-post */
    2975                 :        3847 :         markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
    2976                 :        3847 :         markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
    2977   [ +  +  +  + ]:       68257 :         for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
    2978                 :             :         {
    2979                 :       64410 :                 nexts = s->next;
    2980   [ +  +  +  + ]:       64410 :                 if (s->tmp != nfa->post && !s->flag)
    2981                 :         649 :                         dropstate(nfa, s);
    2982                 :       64410 :         }
    2983   [ +  -  +  +  :        3847 :         assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
                   +  - ]
    2984                 :        3847 :         cleartraverse(nfa, nfa->pre);
    2985   [ +  -  +  +  :        3847 :         assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
                   +  - ]
    2986                 :             :         /* the nins==0 (final unreachable) case will be caught later */
    2987                 :             : 
    2988                 :             :         /* renumber surviving states */
    2989                 :        3847 :         n = 0;
    2990         [ +  + ]:       67608 :         for (s = nfa->states; s != NULL; s = s->next)
    2991                 :       63761 :                 s->no = n++;
    2992                 :        3847 :         nfa->nstates = n;
    2993         [ -  + ]:        3848 : }
    2994                 :             : 
    2995                 :             : /*
    2996                 :             :  * markreachable - recursive marking of reachable states
    2997                 :             :  */
    2998                 :             : static void
    2999                 :       94000 : markreachable(struct nfa *nfa,
    3000                 :             :                           struct state *s,
    3001                 :             :                           struct state *okay,   /* consider only states with this mark */
    3002                 :             :                           struct state *mark)   /* the value to mark with */
    3003                 :             : {
    3004                 :       94000 :         struct arc *a;
    3005                 :             : 
    3006                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    3007         [ -  + ]:       94000 :         if (STACK_TOO_DEEP(nfa->v->re))
    3008                 :             :         {
    3009         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    3010                 :           0 :                 return;
    3011                 :             :         }
    3012                 :             : 
    3013         [ +  + ]:       94000 :         if (s->tmp != okay)
    3014                 :       30238 :                 return;
    3015                 :       63762 :         s->tmp = mark;
    3016                 :             : 
    3017         [ +  + ]:      153915 :         for (a = s->outs; a != NULL; a = a->outchain)
    3018                 :       90153 :                 markreachable(nfa, a->to, okay, mark);
    3019         [ -  + ]:       94000 : }
    3020                 :             : 
    3021                 :             : /*
    3022                 :             :  * markcanreach - recursive marking of states which can reach here
    3023                 :             :  */
    3024                 :             : static void
    3025                 :       94079 : markcanreach(struct nfa *nfa,
    3026                 :             :                          struct state *s,
    3027                 :             :                          struct state *okay,    /* consider only states with this mark */
    3028                 :             :                          struct state *mark)    /* the value to mark with */
    3029                 :             : {
    3030                 :       94079 :         struct arc *a;
    3031                 :             : 
    3032                 :             :         /* Since this is recursive, it could be driven to stack overflow */
    3033         [ -  + ]:       94079 :         if (STACK_TOO_DEEP(nfa->v->re))
    3034                 :             :         {
    3035         [ #  # ]:           0 :                 NERR(REG_ETOOBIG);
    3036                 :           0 :                 return;
    3037                 :             :         }
    3038                 :             : 
    3039         [ +  + ]:       94079 :         if (s->tmp != okay)
    3040                 :       30330 :                 return;
    3041                 :       63749 :         s->tmp = mark;
    3042                 :             : 
    3043         [ +  + ]:      153981 :         for (a = s->ins; a != NULL; a = a->inchain)
    3044                 :       90232 :                 markcanreach(nfa, a->from, okay, mark);
    3045         [ -  + ]:       94079 : }
    3046                 :             : 
    3047                 :             : /*
    3048                 :             :  * analyze - ascertain potentially-useful facts about an optimized NFA
    3049                 :             :  */
    3050                 :             : static long                                             /* re_info bits to be ORed in */
    3051                 :        1924 : analyze(struct nfa *nfa)
    3052                 :             : {
    3053                 :        1924 :         struct arc *a;
    3054                 :        1924 :         struct arc *aa;
    3055                 :             : 
    3056         [ +  + ]:        1924 :         if (NISERR())
    3057                 :           1 :                 return 0;
    3058                 :             : 
    3059                 :             :         /* Detect whether NFA can't match anything */
    3060         [ +  + ]:        1923 :         if (nfa->pre->outs == NULL)
    3061                 :           6 :                 return REG_UIMPOSSIBLE;
    3062                 :             : 
    3063                 :             :         /* Detect whether NFA matches all strings (possibly with length bounds) */
    3064                 :        1917 :         checkmatchall(nfa);
    3065                 :             : 
    3066                 :             :         /* Detect whether NFA can possibly match a zero-length string */
    3067         [ +  + ]:        4849 :         for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    3068         [ +  + ]:        7558 :                 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
    3069         [ +  + ]:        4626 :                         if (aa->to == nfa->post)
    3070                 :        3154 :                                 return REG_UEMPTYMATCH;
    3071                 :        1695 :         return 0;
    3072                 :        1924 : }
    3073                 :             : 
    3074                 :             : /*
    3075                 :             :  * checkmatchall - does the NFA represent no more than a string length test?
    3076                 :             :  *
    3077                 :             :  * If so, set nfa->minmatchall and nfa->maxmatchall correctly (they are -1
    3078                 :             :  * to begin with) and set the MATCHALL bit in nfa->flags.
    3079                 :             :  *
    3080                 :             :  * To succeed, we require all arcs to be PLAIN RAINBOW arcs, except for those
    3081                 :             :  * for pseudocolors (i.e., BOS/BOL/EOS/EOL).  We must be able to reach the
    3082                 :             :  * post state via RAINBOW arcs, and if there are any loops in the graph, they
    3083                 :             :  * must be loop-to-self arcs, ensuring that each loop iteration consumes
    3084                 :             :  * exactly one character.  (Longer loops are problematic because they create
    3085                 :             :  * non-consecutive possible match lengths; we have no good way to represent
    3086                 :             :  * that situation for lengths beyond the DUPINF limit.)
    3087                 :             :  *
    3088                 :             :  * Pseudocolor arcs complicate things a little.  We know that they can only
    3089                 :             :  * appear as pre-state outarcs (for BOS/BOL) or post-state inarcs (for
    3090                 :             :  * EOS/EOL).  There, they must exactly replicate the parallel RAINBOW arcs,
    3091                 :             :  * e.g. if the pre state has one RAINBOW outarc to state 2, it must have BOS
    3092                 :             :  * and BOL outarcs to state 2, and no others.  Missing or extra pseudocolor
    3093                 :             :  * arcs can occur, meaning that the NFA involves some constraint on the
    3094                 :             :  * adjacent characters, which makes it not a matchall NFA.
    3095                 :             :  */
    3096                 :             : static void
    3097                 :        1917 : checkmatchall(struct nfa *nfa)
    3098                 :             : {
    3099                 :        1917 :         bool      **haspaths;
    3100                 :        1917 :         struct state *s;
    3101                 :        1917 :         int                     i;
    3102                 :             : 
    3103                 :             :         /*
    3104                 :             :          * If there are too many states, don't bother trying to detect matchall.
    3105                 :             :          * This limit serves to bound the time and memory we could consume below.
    3106                 :             :          * Note that even if the graph is all-RAINBOW, if there are significantly
    3107                 :             :          * more than DUPINF states then it's likely that there are paths of length
    3108                 :             :          * more than DUPINF, which would force us to fail anyhow.  In practice,
    3109                 :             :          * plausible ways of writing a matchall regex with maximum finite path
    3110                 :             :          * length K tend not to have very many more than K states.
    3111                 :             :          */
    3112         [ -  + ]:        1917 :         if (nfa->nstates > DUPINF * 2)
    3113                 :           0 :                 return;
    3114                 :             : 
    3115                 :             :         /*
    3116                 :             :          * First, scan all the states to verify that only RAINBOW arcs appear,
    3117                 :             :          * plus pseudocolor arcs adjacent to the pre and post states.  This lets
    3118                 :             :          * us quickly eliminate most cases that aren't matchall NFAs.
    3119                 :             :          */
    3120         [ +  + ]:        7040 :         for (s = nfa->states; s != NULL; s = s->next)
    3121                 :             :         {
    3122                 :        6890 :                 struct arc *a;
    3123                 :             : 
    3124         [ +  + ]:       23166 :                 for (a = s->outs; a != NULL; a = a->outchain)
    3125                 :             :                 {
    3126         [ +  + ]:       18043 :                         if (a->type != PLAIN)
    3127                 :           8 :                                 return;                 /* any LACONs make it non-matchall */
    3128         [ +  + ]:       18035 :                         if (a->co != RAINBOW)
    3129                 :             :                         {
    3130         [ +  + ]:        5930 :                                 if (nfa->cm->cd[a->co].flags & PSEUDO)
    3131                 :             :                                 {
    3132                 :             :                                         /*
    3133                 :             :                                          * Pseudocolor arc: verify it's in a valid place (this
    3134                 :             :                                          * seems quite unlikely to fail, but let's be sure).
    3135                 :             :                                          */
    3136   [ +  +  +  - ]:        7480 :                                         if (s == nfa->pre &&
    3137         [ +  + ]:        3309 :                                                 (a->co == nfa->bos[0] || a->co == nfa->bos[1]))
    3138                 :             :                                                  /* okay BOS/BOL arc */ ;
    3139   [ +  -  +  - ]:        1724 :                                         else if (a->to == nfa->post &&
    3140         [ +  + ]:         862 :                                                          (a->co == nfa->eos[0] || a->co == nfa->eos[1]))
    3141                 :             :                                                  /* okay EOS/EOL arc */ ;
    3142                 :             :                                         else
    3143                 :           0 :                                                 return; /* unexpected pseudocolor arc */
    3144                 :             :                                         /* We'll check these arcs some more below. */
    3145                 :        4171 :                                 }
    3146                 :             :                                 else
    3147                 :        1759 :                                         return;         /* any other color makes it non-matchall */
    3148                 :        4171 :                         }
    3149                 :       16276 :                 }
    3150                 :             :                 /* Also, assert that the tmp fields are available for use. */
    3151         [ +  - ]:        5123 :                 assert(s->tmp == NULL);
    3152         [ +  + ]:        6890 :         }
    3153                 :             : 
    3154                 :             :         /*
    3155                 :             :          * The next cheapest check we can make is to verify that the BOS/BOL
    3156                 :             :          * outarcs of the pre state reach the same states as its RAINBOW outarcs.
    3157                 :             :          * If they don't, the NFA expresses some constraints on the character
    3158                 :             :          * before the matched string, making it non-matchall.  Likewise, the
    3159                 :             :          * EOS/EOL inarcs of the post state must match its RAINBOW inarcs.
    3160                 :             :          */
    3161         [ +  - ]:         150 :         if (!check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[0]) ||
    3162         [ +  + ]:         150 :                 !check_out_colors_match(nfa->pre, RAINBOW, nfa->bos[1]) ||
    3163   [ +  -  +  + ]:         118 :                 !check_in_colors_match(nfa->post, RAINBOW, nfa->eos[0]) ||
    3164                 :         118 :                 !check_in_colors_match(nfa->post, RAINBOW, nfa->eos[1]))
    3165                 :          51 :                 return;
    3166                 :             : 
    3167                 :             :         /*
    3168                 :             :          * Initialize an array of path-length arrays, in which
    3169                 :             :          * checkmatchall_recurse will return per-state results.  This lets us
    3170                 :             :          * memo-ize the recursive search and avoid exponential time consumption.
    3171                 :             :          */
    3172                 :          99 :         haspaths = (bool **) MALLOC(nfa->nstates * sizeof(bool *));
    3173         [ -  + ]:          99 :         if (haspaths == NULL)
    3174                 :           0 :                 return;                                 /* fail quietly */
    3175                 :          99 :         memset(haspaths, 0, nfa->nstates * sizeof(bool *));
    3176                 :             : 
    3177                 :             :         /*
    3178                 :             :          * Recursively search the graph for all-RAINBOW paths to the "post" state,
    3179                 :             :          * starting at the "pre" state, and computing the lengths of the paths.
    3180                 :             :          * (Given the preceding checks, there should be at least one such path.
    3181                 :             :          * However we could get back a false result anyway, in case there are
    3182                 :             :          * multi-state loops, paths exceeding DUPINF+1 length, or non-algorithmic
    3183                 :             :          * failures such as ENOMEM.)
    3184                 :             :          */
    3185         [ +  + ]:          99 :         if (checkmatchall_recurse(nfa, nfa->pre, haspaths))
    3186                 :             :         {
    3187                 :             :                 /* The useful result is the path length array for the pre state */
    3188                 :          95 :                 bool       *haspath = haspaths[nfa->pre->no];
    3189                 :          95 :                 int                     minmatch,
    3190                 :             :                                         maxmatch,
    3191                 :             :                                         morematch;
    3192                 :             : 
    3193         [ +  - ]:          95 :                 assert(haspath != NULL);
    3194                 :             : 
    3195                 :             :                 /*
    3196                 :             :                  * haspath[] now represents the set of possible path lengths; but we
    3197                 :             :                  * want to reduce that to a min and max value, because it doesn't seem
    3198                 :             :                  * worth complicating regexec.c to deal with nonconsecutive possible
    3199                 :             :                  * match lengths.  Find min and max of first run of lengths, then
    3200                 :             :                  * verify there are no nonconsecutive lengths.
    3201                 :             :                  */
    3202         [ -  + ]:         237 :                 for (minmatch = 0; minmatch <= DUPINF + 1; minmatch++)
    3203                 :             :                 {
    3204         [ +  + ]:         237 :                         if (haspath[minmatch])
    3205                 :          95 :                                 break;
    3206                 :         142 :                 }
    3207         [ +  - ]:          95 :                 assert(minmatch <= DUPINF + 1); /* else checkmatchall_recurse lied */
    3208         [ +  + ]:        7456 :                 for (maxmatch = minmatch; maxmatch < DUPINF + 1; maxmatch++)
    3209                 :             :                 {
    3210         [ +  + ]:        7428 :                         if (!haspath[maxmatch + 1])
    3211                 :          67 :                                 break;
    3212                 :        7361 :                 }
    3213         [ +  + ]:       16499 :                 for (morematch = maxmatch + 1; morematch <= DUPINF + 1; morematch++)
    3214                 :             :                 {
    3215         [ +  + ]:       16406 :                         if (haspath[morematch])
    3216                 :             :                         {
    3217                 :           2 :                                 haspath = NULL; /* fail, there are nonconsecutive lengths */
    3218                 :           2 :                                 break;
    3219                 :             :                         }
    3220                 :       16404 :                 }
    3221                 :             : 
    3222         [ +  + ]:          95 :                 if (haspath != NULL)
    3223                 :             :                 {
    3224                 :             :                         /*
    3225                 :             :                          * Success, so record the info.  Here we have a fine point: the
    3226                 :             :                          * path length from the pre state includes the pre-to-initial
    3227                 :             :                          * transition, so it's one more than the actually matched string
    3228                 :             :                          * length.  (We avoided counting the final-to-post transition
    3229                 :             :                          * within checkmatchall_recurse, but not this one.)  This is why
    3230                 :             :                          * checkmatchall_recurse allows one more level of path length than
    3231                 :             :                          * might seem necessary.  This decrement also takes care of
    3232                 :             :                          * converting checkmatchall_recurse's definition of "infinity" as
    3233                 :             :                          * "DUPINF+1" to our normal representation as "DUPINF".
    3234                 :             :                          */
    3235         [ +  - ]:          93 :                         assert(minmatch > 0);        /* else pre and post states were adjacent */
    3236                 :          93 :                         nfa->minmatchall = minmatch - 1;
    3237                 :          93 :                         nfa->maxmatchall = maxmatch - 1;
    3238                 :          93 :                         nfa->flags |= MATCHALL;
    3239                 :          93 :                 }
    3240                 :          95 :         }
    3241                 :             : 
    3242                 :             :         /* Clean up */
    3243         [ +  + ]:        1170 :         for (i = 0; i < nfa->nstates; i++)
    3244                 :             :         {
    3245         [ +  + ]:        1071 :                 if (haspaths[i] != NULL)
    3246                 :         972 :                         FREE(haspaths[i]);
    3247                 :        1071 :         }
    3248                 :          99 :         FREE(haspaths);
    3249         [ -  + ]:        1917 : }
    3250                 :             : 
    3251                 :             : /*
    3252                 :             :  * checkmatchall_recurse - recursive search for checkmatchall
    3253                 :             :  *
    3254                 :             :  * s is the state to be examined in this recursion level.
    3255                 :             :  * haspaths[] is an array of per-state exit path length arrays.
    3256                 :             :  *
    3257                 :             :  * We return true if the search was performed successfully, false if
    3258                 :             :  * we had to fail because of multi-state loops or other internal reasons.
    3259                 :             :  * (Because "dead" states that can't reach the post state have been
    3260                 :             :  * eliminated, and we already verified that only RAINBOW and matching
    3261                 :             :  * pseudocolor arcs exist, every state should have RAINBOW path(s) to
    3262                 :             :  * the post state.  Hence we take a false result from recursive calls
    3263                 :             :  * as meaning that we'd better fail altogether, not just that that
    3264                 :             :  * particular state can't reach the post state.)
    3265                 :             :  *
    3266                 :             :  * On success, we store a malloc'd result array in haspaths[s->no],
    3267                 :             :  * showing the possible path lengths from s to the post state.
    3268                 :             :  * Each state's haspath[] array is of length DUPINF+2.  The entries from
    3269                 :             :  * k = 0 to DUPINF are true if there is an all-RAINBOW path of length k
    3270                 :             :  * from this state to the string end.  haspath[DUPINF+1] is true if all
    3271                 :             :  * path lengths >= DUPINF+1 are possible.  (Situations that cannot be
    3272                 :             :  * represented under these rules cause failure.)
    3273                 :             :  *
    3274                 :             :  * checkmatchall is responsible for eventually freeing the haspath[] arrays.
    3275                 :             :  */
    3276                 :             : static bool
    3277                 :         972 : checkmatchall_recurse(struct nfa *nfa, struct state *s, bool **haspaths)
    3278                 :             : {
    3279                 :         972 :         bool            result = false;
    3280                 :         972 :         bool            foundloop = false;
    3281                 :         972 :         bool       *haspath;
    3282                 :         972 :         struct arc *a;
    3283                 :             : 
    3284                 :             :         /*
    3285                 :             :          * Since this is recursive, it could be driven to stack overflow.  But we
    3286                 :             :          * need not treat that as a hard failure; just deem the NFA non-matchall.
    3287                 :             :          */
    3288         [ -  + ]:         972 :         if (STACK_TOO_DEEP(nfa->v->re))
    3289                 :           0 :                 return false;
    3290                 :             : 
    3291                 :             :         /* In case the search takes a long time, check for cancel */
    3292         [ +  - ]:         972 :         INTERRUPT(nfa->v->re);
    3293                 :             : 
    3294                 :             :         /* Create a haspath array for this state */
    3295                 :         972 :         haspath = (bool *) MALLOC((DUPINF + 2) * sizeof(bool));
    3296         [ +  - ]:         972 :         if (haspath == NULL)
    3297                 :           0 :                 return false;                   /* again, treat as non-matchall */
    3298                 :         972 :         memset(haspath, 0, (DUPINF + 2) * sizeof(bool));
    3299                 :             : 
    3300                 :             :         /* Mark this state as being visited */
    3301         [ +  - ]:         972 :         assert(s->tmp == NULL);
    3302                 :         972 :         s->tmp = s;
    3303                 :             : 
    3304         [ +  + ]:       12655 :         for (a = s->outs; a != NULL; a = a->outchain)
    3305                 :             :         {
    3306         [ +  + ]:       11699 :                 if (a->co != RAINBOW)
    3307                 :         794 :                         continue;                       /* ignore pseudocolor arcs */
    3308         [ +  + ]:       10905 :                 if (a->to == nfa->post)
    3309                 :             :                 {
    3310                 :             :                         /* We found an all-RAINBOW path to the post state */
    3311                 :          97 :                         result = true;
    3312                 :             : 
    3313                 :             :                         /*
    3314                 :             :                          * Mark this state as being zero steps away from the string end
    3315                 :             :                          * (the transition to the post state isn't counted).
    3316                 :             :                          */
    3317                 :          97 :                         haspath[0] = true;
    3318                 :          97 :                 }
    3319         [ +  + ]:       10808 :                 else if (a->to == s)
    3320                 :             :                 {
    3321                 :             :                         /* We found a cycle of length 1, which we'll deal with below. */
    3322                 :          28 :                         foundloop = true;
    3323                 :          28 :                 }
    3324         [ +  + ]:       10780 :                 else if (a->to->tmp != NULL)
    3325                 :             :                 {
    3326                 :             :                         /* It's busy, so we found a cycle of length > 1, so fail. */
    3327                 :           2 :                         result = false;
    3328                 :           2 :                         break;
    3329                 :             :                 }
    3330                 :             :                 else
    3331                 :             :                 {
    3332                 :             :                         /* Consider paths forward through this to-state. */
    3333                 :       10778 :                         bool       *nexthaspath;
    3334                 :       10778 :                         int                     i;
    3335                 :             : 
    3336                 :             :                         /* If to-state was not already visited, recurse */
    3337         [ +  + ]:       10778 :                         if (haspaths[a->to->no] == NULL)
    3338                 :             :                         {
    3339                 :         873 :                                 result = checkmatchall_recurse(nfa, a->to, haspaths);
    3340                 :             :                                 /* Fail if any recursive path fails */
    3341         [ +  + ]:         873 :                                 if (!result)
    3342                 :          12 :                                         break;
    3343                 :         861 :                         }
    3344                 :             :                         else
    3345                 :             :                         {
    3346                 :             :                                 /* The previous visit must have found path(s) to the end */
    3347                 :        9905 :                                 result = true;
    3348                 :             :                         }
    3349         [ +  - ]:       10766 :                         assert(a->to->tmp == NULL);
    3350                 :       10766 :                         nexthaspath = haspaths[a->to->no];
    3351         [ +  - ]:       10766 :                         assert(nexthaspath != NULL);
    3352                 :             : 
    3353                 :             :                         /*
    3354                 :             :                          * Now, for every path of length i from a->to to the string end,
    3355                 :             :                          * there is a path of length i + 1 from s to the string end.
    3356                 :             :                          */
    3357         [ +  + ]:       10766 :                         if (nexthaspath[DUPINF] != nexthaspath[DUPINF + 1])
    3358                 :             :                         {
    3359                 :             :                                 /*
    3360                 :             :                                  * a->to has a path of length exactly DUPINF, but not longer;
    3361                 :             :                                  * or it has paths of all lengths > DUPINF but not one of
    3362                 :             :                                  * exactly that length.  In either case, we cannot represent
    3363                 :             :                                  * the possible path lengths from s correctly, so fail.
    3364                 :             :                                  */
    3365                 :           2 :                                 result = false;
    3366                 :           2 :                                 break;
    3367                 :             :                         }
    3368                 :             :                         /* Merge knowledge of these path lengths into what we have */
    3369         [ +  + ]:     2766348 :                         for (i = 0; i < DUPINF; i++)
    3370                 :     2755584 :                                 haspath[i + 1] |= nexthaspath[i];
    3371                 :             :                         /* Infinity + 1 is still infinity */
    3372                 :       10764 :                         haspath[DUPINF + 1] |= nexthaspath[DUPINF + 1];
    3373      [ -  +  + ]:       10778 :                 }
    3374                 :       10889 :         }
    3375                 :             : 
    3376   [ +  +  +  + ]:         972 :         if (result && foundloop)
    3377                 :             :         {
    3378                 :             :                 /*
    3379                 :             :                  * If there is a length-1 loop at this state, then find the shortest
    3380                 :             :                  * known path length to the end.  The loop means that every larger
    3381                 :             :                  * path length is possible, too.  (It doesn't matter whether any of
    3382                 :             :                  * the longer lengths were already known possible.)
    3383                 :             :                  */
    3384                 :          28 :                 int                     i;
    3385                 :             : 
    3386         [ -  + ]:          34 :                 for (i = 0; i <= DUPINF; i++)
    3387                 :             :                 {
    3388         [ +  + ]:          34 :                         if (haspath[i])
    3389                 :          28 :                                 break;
    3390                 :           6 :                 }
    3391         [ +  + ]:        7218 :                 for (i++; i <= DUPINF + 1; i++)
    3392                 :        7190 :                         haspath[i] = true;
    3393                 :          28 :         }
    3394                 :             : 
    3395                 :             :         /* Report out the completed path length map */
    3396         [ +  - ]:         972 :         assert(s->no < nfa->nstates);
    3397         [ +  - ]:         972 :         assert(haspaths[s->no] == NULL);
    3398                 :         972 :         haspaths[s->no] = haspath;
    3399                 :             : 
    3400                 :             :         /* Mark state no longer busy */
    3401                 :         972 :         s->tmp = NULL;
    3402                 :             : 
    3403                 :         972 :         return result;
    3404                 :         972 : }
    3405                 :             : 
    3406                 :             : /*
    3407                 :             :  * check_out_colors_match - subroutine for checkmatchall
    3408                 :             :  *
    3409                 :             :  * Check whether the set of states reachable from s by arcs of color co1
    3410                 :             :  * is equivalent to the set reachable by arcs of color co2.
    3411                 :             :  * checkmatchall already verified that all of the NFA's arcs are PLAIN,
    3412                 :             :  * so we need not examine arc types here.
    3413                 :             :  */
    3414                 :             : static bool
    3415                 :         300 : check_out_colors_match(struct state *s, color co1, color co2)
    3416                 :             : {
    3417                 :         300 :         bool            result = true;
    3418                 :         300 :         struct arc *a;
    3419                 :             : 
    3420                 :             :         /*
    3421                 :             :          * To do this in linear time, we assume that the NFA contains no duplicate
    3422                 :             :          * arcs.  Run through the out-arcs, marking states reachable by arcs of
    3423                 :             :          * color co1.  Run through again, un-marking states reachable by arcs of
    3424                 :             :          * color co2; if we see a not-marked state, we know this co2 arc is
    3425                 :             :          * unmatched.  Then run through again, checking for still-marked states,
    3426                 :             :          * and in any case leaving all the tmp fields reset to NULL.
    3427                 :             :          */
    3428         [ +  + ]:        2302 :         for (a = s->outs; a != NULL; a = a->outchain)
    3429                 :             :         {
    3430         [ +  + ]:        2002 :                 if (a->co == co1)
    3431                 :             :                 {
    3432         [ -  + ]:         646 :                         assert(a->to->tmp == NULL);
    3433                 :         646 :                         a->to->tmp = a->to;
    3434                 :         646 :                 }
    3435                 :        2002 :         }
    3436         [ +  + ]:        2302 :         for (a = s->outs; a != NULL; a = a->outchain)
    3437                 :             :         {
    3438         [ +  + ]:        2002 :                 if (a->co == co2)
    3439                 :             :                 {
    3440         [ +  + ]:         678 :                         if (a->to->tmp != NULL)
    3441                 :         646 :                                 a->to->tmp = NULL;
    3442                 :             :                         else
    3443                 :          32 :                                 result = false; /* unmatched co2 arc */
    3444                 :         678 :                 }
    3445                 :        2002 :         }
    3446         [ +  + ]:        2302 :         for (a = s->outs; a != NULL; a = a->outchain)
    3447                 :             :         {
    3448         [ +  + ]:        2002 :                 if (a->co == co1)
    3449                 :             :                 {
    3450         [ +  - ]:         646 :                         if (a->to->tmp != NULL)
    3451                 :             :                         {
    3452                 :           0 :                                 result = false; /* unmatched co1 arc */
    3453                 :           0 :                                 a->to->tmp = NULL;
    3454                 :           0 :                         }
    3455                 :         646 :                 }
    3456                 :        2002 :         }
    3457                 :         600 :         return result;
    3458                 :         300 : }
    3459                 :             : 
    3460                 :             : /*
    3461                 :             :  * check_in_colors_match - subroutine for checkmatchall
    3462                 :             :  *
    3463                 :             :  * Check whether the set of states that can reach s by arcs of color co1
    3464                 :             :  * is equivalent to the set that can reach s by arcs of color co2.
    3465                 :             :  * checkmatchall already verified that all of the NFA's arcs are PLAIN,
    3466                 :             :  * so we need not examine arc types here.
    3467                 :             :  */
    3468                 :             : static bool
    3469                 :         236 : check_in_colors_match(struct state *s, color co1, color co2)
    3470                 :             : {
    3471                 :         236 :         bool            result = true;
    3472                 :         236 :         struct arc *a;
    3473                 :             : 
    3474                 :             :         /*
    3475                 :             :          * Identical algorithm to check_out_colors_match, except examine the
    3476                 :             :          * from-states of s' inarcs.
    3477                 :             :          */
    3478         [ +  + ]:         868 :         for (a = s->ins; a != NULL; a = a->inchain)
    3479                 :             :         {
    3480         [ +  + ]:         632 :                 if (a->co == co1)
    3481                 :             :                 {
    3482         [ -  + ]:         198 :                         assert(a->from->tmp == NULL);
    3483                 :         198 :                         a->from->tmp = a->from;
    3484                 :         198 :                 }
    3485                 :         632 :         }
    3486         [ +  + ]:         868 :         for (a = s->ins; a != NULL; a = a->inchain)
    3487                 :             :         {
    3488         [ +  + ]:         632 :                 if (a->co == co2)
    3489                 :             :                 {
    3490         [ +  + ]:         217 :                         if (a->from->tmp != NULL)
    3491                 :         198 :                                 a->from->tmp = NULL;
    3492                 :             :                         else
    3493                 :          19 :                                 result = false; /* unmatched co2 arc */
    3494                 :         217 :                 }
    3495                 :         632 :         }
    3496         [ +  + ]:         868 :         for (a = s->ins; a != NULL; a = a->inchain)
    3497                 :             :         {
    3498         [ +  + ]:         632 :                 if (a->co == co1)
    3499                 :             :                 {
    3500         [ +  - ]:         198 :                         if (a->from->tmp != NULL)
    3501                 :             :                         {
    3502                 :           0 :                                 result = false; /* unmatched co1 arc */
    3503                 :           0 :                                 a->from->tmp = NULL;
    3504                 :           0 :                         }
    3505                 :         198 :                 }
    3506                 :         632 :         }
    3507                 :         472 :         return result;
    3508                 :         236 : }
    3509                 :             : 
    3510                 :             : /*
    3511                 :             :  * compact - construct the compact representation of an NFA
    3512                 :             :  */
    3513                 :             : static void
    3514                 :        1923 : compact(struct nfa *nfa,
    3515                 :             :                 struct cnfa *cnfa)
    3516                 :             : {
    3517                 :        1923 :         struct state *s;
    3518                 :        1923 :         struct arc *a;
    3519                 :        1923 :         size_t          nstates;
    3520                 :        1923 :         size_t          narcs;
    3521                 :        1923 :         struct carc *ca;
    3522                 :        1923 :         struct carc *first;
    3523                 :             : 
    3524         [ +  - ]:        1923 :         assert(!NISERR());
    3525                 :             : 
    3526                 :        1923 :         nstates = 0;
    3527                 :        1923 :         narcs = 0;
    3528         [ +  + ]:       24283 :         for (s = nfa->states; s != NULL; s = s->next)
    3529                 :             :         {
    3530                 :       22360 :                 nstates++;
    3531                 :       22360 :                 narcs += s->nouts + 1;       /* need one extra for endmarker */
    3532                 :       22360 :         }
    3533                 :             : 
    3534                 :        1923 :         cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
    3535                 :        1923 :         cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
    3536                 :        1923 :         cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
    3537   [ +  -  +  -  :        1923 :         if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
                   -  + ]
    3538                 :             :         {
    3539         [ #  # ]:           0 :                 if (cnfa->stflags != NULL)
    3540                 :           0 :                         FREE(cnfa->stflags);
    3541         [ #  # ]:           0 :                 if (cnfa->states != NULL)
    3542                 :           0 :                         FREE(cnfa->states);
    3543         [ #  # ]:           0 :                 if (cnfa->arcs != NULL)
    3544                 :           0 :                         FREE(cnfa->arcs);
    3545         [ #  # ]:           0 :                 NERR(REG_ESPACE);
    3546                 :           0 :                 return;
    3547                 :             :         }
    3548                 :        1923 :         cnfa->nstates = nstates;
    3549                 :        1923 :         cnfa->pre = nfa->pre->no;
    3550                 :        1923 :         cnfa->post = nfa->post->no;
    3551                 :        1923 :         cnfa->bos[0] = nfa->bos[0];
    3552                 :        1923 :         cnfa->bos[1] = nfa->bos[1];
    3553                 :        1923 :         cnfa->eos[0] = nfa->eos[0];
    3554                 :        1923 :         cnfa->eos[1] = nfa->eos[1];
    3555                 :        1923 :         cnfa->ncolors = maxcolor(nfa->cm) + 1;
    3556                 :        1923 :         cnfa->flags = nfa->flags;
    3557                 :        1923 :         cnfa->minmatchall = nfa->minmatchall;
    3558                 :        1923 :         cnfa->maxmatchall = nfa->maxmatchall;
    3559                 :             : 
    3560                 :        1923 :         ca = cnfa->arcs;
    3561         [ +  + ]:       24283 :         for (s = nfa->states; s != NULL; s = s->next)
    3562                 :             :         {
    3563         [ -  + ]:       22360 :                 assert((size_t) s->no < nstates);
    3564                 :       22360 :                 cnfa->stflags[s->no] = 0;
    3565                 :       22360 :                 cnfa->states[s->no] = ca;
    3566                 :       22360 :                 first = ca;
    3567         [ +  + ]:       65867 :                 for (a = s->outs; a != NULL; a = a->outchain)
    3568      [ +  +  - ]:       43507 :                         switch (a->type)
    3569                 :             :                         {
    3570                 :             :                                 case PLAIN:
    3571                 :       43493 :                                         ca->co = a->co;
    3572                 :       43493 :                                         ca->to = a->to->no;
    3573                 :       43493 :                                         ca++;
    3574                 :       43493 :                                         break;
    3575                 :             :                                 case LACON:
    3576         [ +  - ]:          14 :                                         assert(s->no != cnfa->pre);
    3577         [ -  + ]:          14 :                                         assert(a->co >= 0);
    3578                 :          14 :                                         ca->co = (color) (cnfa->ncolors + a->co);
    3579                 :          14 :                                         ca->to = a->to->no;
    3580                 :          14 :                                         ca++;
    3581                 :          14 :                                         cnfa->flags |= HASLACONS;
    3582                 :          14 :                                         break;
    3583                 :             :                                 default:
    3584         [ #  # ]:           0 :                                         NERR(REG_ASSERT);
    3585                 :           0 :                                         return;
    3586                 :       43507 :                         }
    3587                 :       22360 :                 carcsort(first, ca - first);
    3588                 :       22360 :                 ca->co = COLORLESS;
    3589                 :       22360 :                 ca->to = 0;
    3590                 :       22360 :                 ca++;
    3591                 :       22360 :         }
    3592         [ +  - ]:        1923 :         assert(ca == &cnfa->arcs[narcs]);
    3593         [ +  - ]:        1923 :         assert(cnfa->nstates != 0);
    3594                 :             : 
    3595                 :             :         /* mark no-progress states */
    3596         [ +  + ]:        6994 :         for (a = nfa->pre->outs; a != NULL; a = a->outchain)
    3597                 :        5071 :                 cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
    3598                 :        1923 :         cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
    3599         [ -  + ]:        1923 : }
    3600                 :             : 
    3601                 :             : /*
    3602                 :             :  * carcsort - sort compacted-NFA arcs by color
    3603                 :             :  */
    3604                 :             : static void
    3605                 :       22360 : carcsort(struct carc *first, size_t n)
    3606                 :             : {
    3607         [ +  + ]:       22360 :         if (n > 1)
    3608                 :        2594 :                 qsort(first, n, sizeof(struct carc), carc_cmp);
    3609                 :       22360 : }
    3610                 :             : 
    3611                 :             : static int
    3612                 :      170399 : carc_cmp(const void *a, const void *b)
    3613                 :             : {
    3614                 :      170399 :         const struct carc *aa = (const struct carc *) a;
    3615                 :      170399 :         const struct carc *bb = (const struct carc *) b;
    3616                 :             : 
    3617         [ +  + ]:      170399 :         if (aa->co < bb->co)
    3618                 :        3336 :                 return -1;
    3619         [ +  + ]:      167063 :         if (aa->co > bb->co)
    3620                 :        8180 :                 return +1;
    3621         [ +  + ]:      158883 :         if (aa->to < bb->to)
    3622                 :      108372 :                 return -1;
    3623         [ +  - ]:       50511 :         if (aa->to > bb->to)
    3624                 :       50511 :                 return +1;
    3625                 :             :         /* This is unreached, since there should be no duplicate arcs now: */
    3626                 :           0 :         return 0;
    3627                 :      170399 : }
    3628                 :             : 
    3629                 :             : /*
    3630                 :             :  * freecnfa - free a compacted NFA
    3631                 :             :  */
    3632                 :             : static void
    3633                 :          40 : freecnfa(struct cnfa *cnfa)
    3634                 :             : {
    3635         [ +  - ]:          40 :         assert(!NULLCNFA(*cnfa));       /* not empty already */
    3636                 :          40 :         FREE(cnfa->stflags);
    3637                 :          40 :         FREE(cnfa->states);
    3638                 :          40 :         FREE(cnfa->arcs);
    3639                 :          40 :         ZAPCNFA(*cnfa);
    3640                 :          40 : }
    3641                 :             : 
    3642                 :             : /*
    3643                 :             :  * dumpnfa - dump an NFA in human-readable form
    3644                 :             :  */
    3645                 :             : static void
    3646                 :           0 : dumpnfa(struct nfa *nfa,
    3647                 :             :                 FILE *f)
    3648                 :             : {
    3649                 :             : #ifdef REG_DEBUG
    3650                 :             :         struct state *s;
    3651                 :             :         int                     nstates = 0;
    3652                 :             :         int                     narcs = 0;
    3653                 :             : 
    3654                 :             :         fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
    3655                 :             :         if (nfa->bos[0] != COLORLESS)
    3656                 :             :                 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
    3657                 :             :         if (nfa->bos[1] != COLORLESS)
    3658                 :             :                 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
    3659                 :             :         if (nfa->eos[0] != COLORLESS)
    3660                 :             :                 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
    3661                 :             :         if (nfa->eos[1] != COLORLESS)
    3662                 :             :                 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
    3663                 :             :         if (nfa->flags & HASLACONS)
    3664                 :             :                 fprintf(f, ", haslacons");
    3665                 :             :         if (nfa->flags & HASCANTMATCH)
    3666                 :             :                 fprintf(f, ", hascantmatch");
    3667                 :             :         if (nfa->flags & MATCHALL)
    3668                 :             :         {
    3669                 :             :                 fprintf(f, ", minmatchall %d", nfa->minmatchall);
    3670                 :             :                 if (nfa->maxmatchall == DUPINF)
    3671                 :             :                         fprintf(f, ", maxmatchall inf");
    3672                 :             :                 else
    3673                 :             :                         fprintf(f, ", maxmatchall %d", nfa->maxmatchall);
    3674                 :             :         }
    3675                 :             :         fprintf(f, "\n");
    3676                 :             :         for (s = nfa->states; s != NULL; s = s->next)
    3677                 :             :         {
    3678                 :             :                 dumpstate(s, f);
    3679                 :             :                 nstates++;
    3680                 :             :                 narcs += s->nouts;
    3681                 :             :         }
    3682                 :             :         fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
    3683                 :             :         if (nfa->parent == NULL)
    3684                 :             :                 dumpcolors(nfa->cm, f);
    3685                 :             :         fflush(f);
    3686                 :             : #endif
    3687                 :           0 : }
    3688                 :             : 
    3689                 :             : #ifdef REG_DEBUG                                /* subordinates of dumpnfa */
    3690                 :             : 
    3691                 :             : /*
    3692                 :             :  * dumpstate - dump an NFA state in human-readable form
    3693                 :             :  */
    3694                 :             : static void
    3695                 :             : dumpstate(struct state *s,
    3696                 :             :                   FILE *f)
    3697                 :             : {
    3698                 :             :         struct arc *a;
    3699                 :             : 
    3700                 :             :         fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
    3701                 :             :                         (s->flag) ? s->flag : '.');
    3702                 :             :         if (s->prev != NULL && s->prev->next != s)
    3703                 :             :                 fprintf(f, "\tstate chain bad\n");
    3704                 :             :         if (s->nouts == 0)
    3705                 :             :                 fprintf(f, "\tno out arcs\n");
    3706                 :             :         else
    3707                 :             :                 dumparcs(s, f);
    3708                 :             :         for (a = s->ins; a != NULL; a = a->inchain)
    3709                 :             :         {
    3710                 :             :                 if (a->to != s)
    3711                 :             :                         fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
    3712                 :             :                                         a->from->no, a->to->no, s->no);
    3713                 :             :         }
    3714                 :             :         fflush(f);
    3715                 :             : }
    3716                 :             : 
    3717                 :             : /*
    3718                 :             :  * dumparcs - dump out-arcs in human-readable form
    3719                 :             :  */
    3720                 :             : static void
    3721                 :             : dumparcs(struct state *s,
    3722                 :             :                  FILE *f)
    3723                 :             : {
    3724                 :             :         int                     pos;
    3725                 :             :         struct arc *a;
    3726                 :             : 
    3727                 :             :         /* printing oldest arcs first is usually clearer */
    3728                 :             :         a = s->outs;
    3729                 :             :         assert(a != NULL);
    3730                 :             :         while (a->outchain != NULL)
    3731                 :             :                 a = a->outchain;
    3732                 :             :         pos = 1;
    3733                 :             :         do
    3734                 :             :         {
    3735                 :             :                 dumparc(a, s, f);
    3736                 :             :                 if (pos == 5)
    3737                 :             :                 {
    3738                 :             :                         fprintf(f, "\n");
    3739                 :             :                         pos = 1;
    3740                 :             :                 }
    3741                 :             :                 else
    3742                 :             :                         pos++;
    3743                 :             :                 a = a->outchainRev;
    3744                 :             :         } while (a != NULL);
    3745                 :             :         if (pos != 1)
    3746                 :             :                 fprintf(f, "\n");
    3747                 :             : }
    3748                 :             : 
    3749                 :             : /*
    3750                 :             :  * dumparc - dump one outarc in readable form, including prefixing tab
    3751                 :             :  */
    3752                 :             : static void
    3753                 :             : dumparc(struct arc *a,
    3754                 :             :                 struct state *s,
    3755                 :             :                 FILE *f)
    3756                 :             : {
    3757                 :             :         struct arc *aa;
    3758                 :             : 
    3759                 :             :         fprintf(f, "\t");
    3760                 :             :         switch (a->type)
    3761                 :             :         {
    3762                 :             :                 case PLAIN:
    3763                 :             :                         if (a->co == RAINBOW)
    3764                 :             :                                 fprintf(f, "[*]");
    3765                 :             :                         else
    3766                 :             :                                 fprintf(f, "[%ld]", (long) a->co);
    3767                 :             :                         break;
    3768                 :             :                 case AHEAD:
    3769                 :             :                         if (a->co == RAINBOW)
    3770                 :             :                                 fprintf(f, ">*>");
    3771                 :             :                         else
    3772                 :             :                                 fprintf(f, ">%ld>", (long) a->co);
    3773                 :             :                         break;
    3774                 :             :                 case BEHIND:
    3775                 :             :                         if (a->co == RAINBOW)
    3776                 :             :                                 fprintf(f, "<*<");
    3777                 :             :                         else
    3778                 :             :                                 fprintf(f, "<%ld<", (long) a->co);
    3779                 :             :                         break;
    3780                 :             :                 case LACON:
    3781                 :             :                         fprintf(f, ":%ld:", (long) a->co);
    3782                 :             :                         break;
    3783                 :             :                 case '^':
    3784                 :             :                 case '$':
    3785                 :             :                         fprintf(f, "%c%d", a->type, (int) a->co);
    3786                 :             :                         break;
    3787                 :             :                 case EMPTY:
    3788                 :             :                         break;
    3789                 :             :                 case CANTMATCH:
    3790                 :             :                         fprintf(f, "X");
    3791                 :             :                         break;
    3792                 :             :                 default:
    3793                 :             :                         fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
    3794                 :             :                         break;
    3795                 :             :         }
    3796                 :             :         if (a->from != s)
    3797                 :             :                 fprintf(f, "?%d?", a->from->no);
    3798                 :             :         for (aa = a->from->outs; aa != NULL; aa = aa->outchain)
    3799                 :             :                 if (aa == a)
    3800                 :             :                         break;                          /* NOTE BREAK OUT */
    3801                 :             :         if (aa == NULL)
    3802                 :             :                 fprintf(f, "?!?");            /* missing from out-chain */
    3803                 :             :         fprintf(f, "->");
    3804                 :             :         if (a->to == NULL)
    3805                 :             :         {
    3806                 :             :                 fprintf(f, "NULL");
    3807                 :             :                 return;
    3808                 :             :         }
    3809                 :             :         fprintf(f, "%d", a->to->no);
    3810                 :             :         for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
    3811                 :             :                 if (aa == a)
    3812                 :             :                         break;                          /* NOTE BREAK OUT */
    3813                 :             :         if (aa == NULL)
    3814                 :             :                 fprintf(f, "?!?");            /* missing from in-chain */
    3815                 :             : }
    3816                 :             : #endif                                                  /* REG_DEBUG */
    3817                 :             : 
    3818                 :             : /*
    3819                 :             :  * dumpcnfa - dump a compacted NFA in human-readable form
    3820                 :             :  */
    3821                 :             : #ifdef REG_DEBUG
    3822                 :             : static void
    3823                 :             : dumpcnfa(struct cnfa *cnfa,
    3824                 :             :                  FILE *f)
    3825                 :             : {
    3826                 :             :         int                     st;
    3827                 :             : 
    3828                 :             :         fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
    3829                 :             :         if (cnfa->bos[0] != COLORLESS)
    3830                 :             :                 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
    3831                 :             :         if (cnfa->bos[1] != COLORLESS)
    3832                 :             :                 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
    3833                 :             :         if (cnfa->eos[0] != COLORLESS)
    3834                 :             :                 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
    3835                 :             :         if (cnfa->eos[1] != COLORLESS)
    3836                 :             :                 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
    3837                 :             :         if (cnfa->flags & HASLACONS)
    3838                 :             :                 fprintf(f, ", haslacons");
    3839                 :             :         if (cnfa->flags & MATCHALL)
    3840                 :             :         {
    3841                 :             :                 fprintf(f, ", minmatchall %d", cnfa->minmatchall);
    3842                 :             :                 if (cnfa->maxmatchall == DUPINF)
    3843                 :             :                         fprintf(f, ", maxmatchall inf");
    3844                 :             :                 else
    3845                 :             :                         fprintf(f, ", maxmatchall %d", cnfa->maxmatchall);
    3846                 :             :         }
    3847                 :             :         fprintf(f, "\n");
    3848                 :             :         for (st = 0; st < cnfa->nstates; st++)
    3849                 :             :                 dumpcstate(st, cnfa, f);
    3850                 :             :         fflush(f);
    3851                 :             : }
    3852                 :             : #endif
    3853                 :             : 
    3854                 :             : #ifdef REG_DEBUG                                /* subordinates of dumpcnfa */
    3855                 :             : 
    3856                 :             : /*
    3857                 :             :  * dumpcstate - dump a compacted-NFA state in human-readable form
    3858                 :             :  */
    3859                 :             : static void
    3860                 :             : dumpcstate(int st,
    3861                 :             :                    struct cnfa *cnfa,
    3862                 :             :                    FILE *f)
    3863                 :             : {
    3864                 :             :         struct carc *ca;
    3865                 :             :         int                     pos;
    3866                 :             : 
    3867                 :             :         fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
    3868                 :             :         pos = 1;
    3869                 :             :         for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
    3870                 :             :         {
    3871                 :             :                 if (ca->co == RAINBOW)
    3872                 :             :                         fprintf(f, "\t[*]->%d", ca->to);
    3873                 :             :                 else if (ca->co < cnfa->ncolors)
    3874                 :             :                         fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
    3875                 :             :                 else
    3876                 :             :                         fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
    3877                 :             :                 if (pos == 5)
    3878                 :             :                 {
    3879                 :             :                         fprintf(f, "\n");
    3880                 :             :                         pos = 1;
    3881                 :             :                 }
    3882                 :             :                 else
    3883                 :             :                         pos++;
    3884                 :             :         }
    3885                 :             :         if (ca == cnfa->states[st] || pos != 1)
    3886                 :             :                 fprintf(f, "\n");
    3887                 :             :         fflush(f);
    3888                 :             : }
    3889                 :             : 
    3890                 :             : #endif                                                  /* REG_DEBUG */
        

Generated by: LCOV version 2.3.2-1