LCOV - code coverage report
Current view: top level - contrib/fuzzystrmatch - dmetaphone.c (source / functions) Coverage Total Hit
Test: Code coverage Lines: 0.0 % 690 0
Test Date: 2026-01-26 10:56:24 Functions: 0.0 % 15 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * This is a port of the Double Metaphone algorithm for use in PostgreSQL.
       3              :  *
       4              :  * contrib/fuzzystrmatch/dmetaphone.c
       5              :  *
       6              :  * Double Metaphone computes 2 "sounds like" strings - a primary and an
       7              :  * alternate. In most cases they are the same, but for foreign names
       8              :  * especially they can be a bit different, depending on pronunciation.
       9              :  *
      10              :  * Information on using Double Metaphone can be found at
      11              :  *       http://www.codeproject.com/string/dmetaphone1.asp
      12              :  * and the original article describing it can be found at
      13              :  *       http://drdobbs.com/184401251
      14              :  *
      15              :  * For PostgreSQL we provide 2 functions - one for the primary and one for
      16              :  * the alternate. That way the functions are pure text->text mappings that
      17              :  * are useful in functional indexes. These are 'dmetaphone' for the
      18              :  * primary and 'dmetaphone_alt' for the alternate.
      19              :  *
      20              :  * Assuming that dmetaphone.so is in $libdir, the SQL to set up the
      21              :  * functions looks like this:
      22              :  *
      23              :  * CREATE FUNCTION dmetaphone (text) RETURNS text
      24              :  *        LANGUAGE C IMMUTABLE STRICT
      25              :  *        AS '$libdir/dmetaphone', 'dmetaphone';
      26              :  *
      27              :  * CREATE FUNCTION dmetaphone_alt (text) RETURNS text
      28              :  *        LANGUAGE C IMMUTABLE STRICT
      29              :  *        AS '$libdir/dmetaphone', 'dmetaphone_alt';
      30              :  *
      31              :  * Note that you have to declare the functions IMMUTABLE if you want to
      32              :  * use them in functional indexes, and you have to declare them as STRICT
      33              :  * as they do not check for NULL input, and will segfault if given NULL input.
      34              :  * (See below for alternative ) Declaring them as STRICT means PostgreSQL
      35              :  * will never call them with NULL, but instead assume the result is NULL,
      36              :  * which is what we (I) want.
      37              :  *
      38              :  * Alternatively, compile with -DDMETAPHONE_NOSTRICT and the functions
      39              :  * will detect NULL input and return NULL. The you don't have to declare them
      40              :  * as STRICT.
      41              :  *
      42              :  * There is a small inefficiency here - each function call actually computes
      43              :  * both the primary and the alternate and then throws away the one it doesn't
      44              :  * need. That's the way the perl module was written, because perl can handle
      45              :  * a list return more easily than we can in PostgreSQL. The result has been
      46              :  * fast enough for my needs, but it could maybe be optimized a bit to remove
      47              :  * that behaviour.
      48              :  *
      49              :  */
      50              : 
      51              : 
      52              : /***************************** COPYRIGHT NOTICES ***********************
      53              : 
      54              : Most of this code is directly from the Text::DoubleMetaphone perl module
      55              : version 0.05 available from https://www.cpan.org/.
      56              : It bears this copyright notice:
      57              : 
      58              : 
      59              :   Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
      60              :   All rights reserved.
      61              : 
      62              :   This code is based heavily on the C++ implementation by
      63              :   Lawrence Philips and incorporates several bug fixes courtesy
      64              :   of Kevin Atkinson <kevina@users.sourceforge.net>.
      65              : 
      66              :   This module is free software; you may redistribute it and/or
      67              :   modify it under the same terms as Perl itself.
      68              : 
      69              : The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
      70              : <andrew@dunslane.net> and is covered this copyright:
      71              : 
      72              :   Copyright 2003, North Carolina State Highway Patrol.
      73              :   All rights reserved.
      74              : 
      75              :   Permission to use, copy, modify, and distribute this software and its
      76              :   documentation for any purpose, without fee, and without a written agreement
      77              :   is hereby granted, provided that the above copyright notice and this
      78              :   paragraph and the following two paragraphs appear in all copies.
      79              : 
      80              :   IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
      81              :   PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
      82              :   INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
      83              :   DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
      84              :   ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      85              : 
      86              :   THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
      87              :   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
      88              :   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED
      89              :   HEREUNDER IS ON AN "AS IS" BASIS, AND THE NORTH CAROLINA STATE HIGHWAY PATROL
      90              :   HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      91              :   MODIFICATIONS.
      92              : 
      93              : ***********************************************************************/
      94              : 
      95              : 
      96              : /* include these first, according to the docs */
      97              : #ifndef DMETAPHONE_MAIN
      98              : 
      99              : #include "postgres.h"
     100              : 
     101              : #include "utils/builtins.h"
     102              : #include "utils/formatting.h"
     103              : 
     104              : /* turn off assertions for embedded function */
     105              : #define NDEBUG
     106              : 
     107              : #else                                                   /* DMETAPHONE_MAIN */
     108              : 
     109              : /* we need these if we didn't get them from postgres.h */
     110              : #include <stdio.h>
     111              : #include <stdlib.h>
     112              : #include <string.h>
     113              : #include <stdarg.h>
     114              : 
     115              : #endif                                                  /* DMETAPHONE_MAIN */
     116              : 
     117              : #include <assert.h>
     118              : #include <ctype.h>
     119              : 
     120              : /* prototype for the main function we got from the perl module */
     121              : static void DoubleMetaphone(const char *str, Oid collid, char **codes);
     122              : 
     123              : #ifndef DMETAPHONE_MAIN
     124              : 
     125              : /*
     126              :  * The PostgreSQL visible dmetaphone function.
     127              :  */
     128              : 
     129            0 : PG_FUNCTION_INFO_V1(dmetaphone);
     130              : 
     131              : Datum
     132            0 : dmetaphone(PG_FUNCTION_ARGS)
     133              : {
     134            0 :         text       *arg;
     135            0 :         char       *aptr,
     136              :                            *codes[2],
     137              :                            *code;
     138              : 
     139              : #ifdef DMETAPHONE_NOSTRICT
     140              :         if (PG_ARGISNULL(0))
     141              :                 PG_RETURN_NULL();
     142              : #endif
     143            0 :         arg = PG_GETARG_TEXT_PP(0);
     144            0 :         aptr = text_to_cstring(arg);
     145              : 
     146            0 :         DoubleMetaphone(aptr, PG_GET_COLLATION(), codes);
     147            0 :         code = codes[0];
     148            0 :         if (!code)
     149            0 :                 code = "";
     150              : 
     151            0 :         PG_RETURN_TEXT_P(cstring_to_text(code));
     152            0 : }
     153              : 
     154              : /*
     155              :  * The PostgreSQL visible dmetaphone_alt function.
     156              :  */
     157              : 
     158            0 : PG_FUNCTION_INFO_V1(dmetaphone_alt);
     159              : 
     160              : Datum
     161            0 : dmetaphone_alt(PG_FUNCTION_ARGS)
     162              : {
     163            0 :         text       *arg;
     164            0 :         char       *aptr,
     165              :                            *codes[2],
     166              :                            *code;
     167              : 
     168              : #ifdef DMETAPHONE_NOSTRICT
     169              :         if (PG_ARGISNULL(0))
     170              :                 PG_RETURN_NULL();
     171              : #endif
     172            0 :         arg = PG_GETARG_TEXT_PP(0);
     173            0 :         aptr = text_to_cstring(arg);
     174              : 
     175            0 :         DoubleMetaphone(aptr, PG_GET_COLLATION(), codes);
     176            0 :         code = codes[1];
     177            0 :         if (!code)
     178            0 :                 code = "";
     179              : 
     180            0 :         PG_RETURN_TEXT_P(cstring_to_text(code));
     181            0 : }
     182              : 
     183              : 
     184              : /* here is where we start the code imported from the perl module */
     185              : 
     186              : /* all memory handling is done with these macros */
     187              : 
     188              : #define META_MALLOC(v,n,t) \
     189              :                   (v = (t*)palloc(((n)*sizeof(t))))
     190              : 
     191              : #define META_REALLOC(v,n,t) \
     192              :                                           (v = (t*)repalloc((v),((n)*sizeof(t))))
     193              : 
     194              : /*
     195              :  * Don't do pfree - it seems to cause a SIGSEGV sometimes - which might have just
     196              :  * been caused by reloading the module in development.
     197              :  * So we rely on context cleanup - Tom Lane says pfree shouldn't be necessary
     198              :  * in a case like this.
     199              :  */
     200              : 
     201              : #define META_FREE(x) ((void)true)       /* pfree((x)) */
     202              : #else                                                   /* not defined DMETAPHONE_MAIN */
     203              : 
     204              : /* use the standard malloc library when not running in PostgreSQL */
     205              : 
     206              : #define META_MALLOC(v,n,t) \
     207              :                   (v = (t*)malloc(((n)*sizeof(t))))
     208              : 
     209              : #define META_REALLOC(v,n,t) \
     210              :                                           (v = (t*)realloc((v),((n)*sizeof(t))))
     211              : 
     212              : #define META_FREE(x) free((x))
     213              : #endif                                                  /* defined DMETAPHONE_MAIN */
     214              : 
     215              : 
     216              : 
     217              : /* this typedef was originally in the perl module's .h file */
     218              : 
     219              : typedef struct
     220              : {
     221              :         char       *str;
     222              :         int                     length;
     223              :         int                     bufsize;
     224              :         int                     free_string_on_destroy;
     225              : }
     226              : 
     227              : metastring;
     228              : 
     229              : /*
     230              :  * remaining perl module funcs unchanged except for declaring them static
     231              :  * and reformatting to PostgreSQL indentation and to fit in 80 cols.
     232              :  *
     233              :  */
     234              : 
     235              : static metastring *
     236            0 : NewMetaString(const char *init_str)
     237              : {
     238            0 :         metastring *s;
     239            0 :         char            empty_string[] = "";
     240              : 
     241            0 :         META_MALLOC(s, 1, metastring);
     242              :         assert(s != NULL);
     243              : 
     244            0 :         if (init_str == NULL)
     245            0 :                 init_str = empty_string;
     246            0 :         s->length = strlen(init_str);
     247              :         /* preallocate a bit more for potential growth */
     248            0 :         s->bufsize = s->length + 7;
     249              : 
     250            0 :         META_MALLOC(s->str, s->bufsize, char);
     251              :         assert(s->str != NULL);
     252              : 
     253            0 :         memcpy(s->str, init_str, s->length + 1);
     254            0 :         s->free_string_on_destroy = 1;
     255              : 
     256            0 :         return s;
     257            0 : }
     258              : 
     259              : 
     260              : static void
     261            0 : DestroyMetaString(metastring *s)
     262              : {
     263            0 :         if (s == NULL)
     264            0 :                 return;
     265              : 
     266            0 :         if (s->free_string_on_destroy && (s->str != NULL))
     267            0 :                 META_FREE(s->str);
     268              : 
     269              :         META_FREE(s);
     270            0 : }
     271              : 
     272              : 
     273              : static void
     274            0 : IncreaseBuffer(metastring *s, int chars_needed)
     275              : {
     276            0 :         META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
     277              :         assert(s->str != NULL);
     278            0 :         s->bufsize = s->bufsize + chars_needed + 10;
     279            0 : }
     280              : 
     281              : 
     282              : static metastring *
     283            0 : MakeUpper(metastring *s, Oid collid)
     284              : {
     285            0 :         char       *newstr;
     286            0 :         metastring *newms;
     287              : 
     288            0 :         newstr = str_toupper(s->str, s->length, collid);
     289            0 :         newms = NewMetaString(newstr);
     290            0 :         DestroyMetaString(s);
     291              : 
     292            0 :         return newms;
     293            0 : }
     294              : 
     295              : 
     296              : static int
     297            0 : IsVowel(metastring *s, int pos)
     298              : {
     299            0 :         char            c;
     300              : 
     301            0 :         if ((pos < 0) || (pos >= s->length))
     302            0 :                 return 0;
     303              : 
     304            0 :         c = *(s->str + pos);
     305            0 :         if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
     306            0 :                 (c == 'U') || (c == 'Y'))
     307            0 :                 return 1;
     308              : 
     309            0 :         return 0;
     310            0 : }
     311              : 
     312              : 
     313              : static int
     314            0 : SlavoGermanic(metastring *s)
     315              : {
     316            0 :         if (strstr(s->str, "W"))
     317            0 :                 return 1;
     318            0 :         else if (strstr(s->str, "K"))
     319            0 :                 return 1;
     320            0 :         else if (strstr(s->str, "CZ"))
     321            0 :                 return 1;
     322            0 :         else if (strstr(s->str, "WITZ"))
     323            0 :                 return 1;
     324              :         else
     325            0 :                 return 0;
     326            0 : }
     327              : 
     328              : 
     329              : static char
     330            0 : GetAt(metastring *s, int pos)
     331              : {
     332            0 :         if ((pos < 0) || (pos >= s->length))
     333            0 :                 return '\0';
     334              : 
     335            0 :         return *(s->str + pos);
     336            0 : }
     337              : 
     338              : 
     339              : static void
     340            0 : SetAt(metastring *s, int pos, char c)
     341              : {
     342            0 :         if ((pos < 0) || (pos >= s->length))
     343            0 :                 return;
     344              : 
     345            0 :         *(s->str + pos) = c;
     346            0 : }
     347              : 
     348              : 
     349              : /*
     350              :    Caveats: the START value is 0 based
     351              : */
     352              : static int
     353            0 : StringAt(metastring *s, int start, int length,...)
     354              : {
     355            0 :         char       *test;
     356            0 :         char       *pos;
     357            0 :         va_list         ap;
     358              : 
     359            0 :         if ((start < 0) || (start >= s->length))
     360            0 :                 return 0;
     361              : 
     362            0 :         pos = (s->str + start);
     363            0 :         va_start(ap, length);
     364              : 
     365            0 :         do
     366              :         {
     367            0 :                 test = va_arg(ap, char *);
     368            0 :                 if (*test && (strncmp(pos, test, length) == 0))
     369              :                 {
     370            0 :                         va_end(ap);
     371            0 :                         return 1;
     372              :                 }
     373            0 :         }
     374            0 :         while (strcmp(test, "") != 0);
     375              : 
     376            0 :         va_end(ap);
     377              : 
     378            0 :         return 0;
     379            0 : }
     380              : 
     381              : 
     382              : static void
     383            0 : MetaphAdd(metastring *s, const char *new_str)
     384              : {
     385            0 :         int                     add_length;
     386              : 
     387            0 :         if (new_str == NULL)
     388            0 :                 return;
     389              : 
     390            0 :         add_length = strlen(new_str);
     391            0 :         if ((s->length + add_length) > (s->bufsize - 1))
     392            0 :                 IncreaseBuffer(s, add_length);
     393              : 
     394            0 :         strcat(s->str, new_str);
     395            0 :         s->length += add_length;
     396            0 : }
     397              : 
     398              : 
     399              : static void
     400            0 : DoubleMetaphone(const char *str, Oid collid, char **codes)
     401              : {
     402            0 :         int                     length;
     403            0 :         metastring *original;
     404            0 :         metastring *primary;
     405            0 :         metastring *secondary;
     406            0 :         int                     current;
     407            0 :         int                     last;
     408              : 
     409            0 :         current = 0;
     410              :         /* we need the real length and last prior to padding */
     411            0 :         length = strlen(str);
     412            0 :         last = length - 1;
     413            0 :         original = NewMetaString(str);
     414              :         /* Pad original so we can index beyond end */
     415            0 :         MetaphAdd(original, "     ");
     416              : 
     417            0 :         primary = NewMetaString("");
     418            0 :         secondary = NewMetaString("");
     419            0 :         primary->free_string_on_destroy = 0;
     420            0 :         secondary->free_string_on_destroy = 0;
     421              : 
     422            0 :         original = MakeUpper(original, collid);
     423              : 
     424              :         /* skip these when at start of word */
     425            0 :         if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
     426            0 :                 current += 1;
     427              : 
     428              :         /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
     429            0 :         if (GetAt(original, 0) == 'X')
     430              :         {
     431            0 :                 MetaphAdd(primary, "S");      /* 'Z' maps to 'S' */
     432            0 :                 MetaphAdd(secondary, "S");
     433            0 :                 current += 1;
     434            0 :         }
     435              : 
     436              :         /* main loop */
     437            0 :         while ((primary->length < 4) || (secondary->length < 4))
     438              :         {
     439            0 :                 if (current >= length)
     440            0 :                         break;
     441              : 
     442            0 :                 switch (GetAt(original, current))
     443              :                 {
     444              :                         case 'A':
     445              :                         case 'E':
     446              :                         case 'I':
     447              :                         case 'O':
     448              :                         case 'U':
     449              :                         case 'Y':
     450            0 :                                 if (current == 0)
     451              :                                 {
     452              :                                         /* all init vowels now map to 'A' */
     453            0 :                                         MetaphAdd(primary, "A");
     454            0 :                                         MetaphAdd(secondary, "A");
     455            0 :                                 }
     456            0 :                                 current += 1;
     457            0 :                                 break;
     458              : 
     459              :                         case 'B':
     460              : 
     461              :                                 /* "-mb", e.g", "dumb", already skipped over... */
     462            0 :                                 MetaphAdd(primary, "P");
     463            0 :                                 MetaphAdd(secondary, "P");
     464              : 
     465            0 :                                 if (GetAt(original, current + 1) == 'B')
     466            0 :                                         current += 2;
     467              :                                 else
     468            0 :                                         current += 1;
     469            0 :                                 break;
     470              : 
     471              :                         case '\xc7':            /* C with cedilla */
     472            0 :                                 MetaphAdd(primary, "S");
     473            0 :                                 MetaphAdd(secondary, "S");
     474            0 :                                 current += 1;
     475            0 :                                 break;
     476              : 
     477              :                         case 'C':
     478              :                                 /* various germanic */
     479            0 :                                 if ((current > 1)
     480            0 :                                         && !IsVowel(original, current - 2)
     481            0 :                                         && StringAt(original, (current - 1), 3, "ACH", "")
     482            0 :                                         && ((GetAt(original, current + 2) != 'I')
     483            0 :                                                 && ((GetAt(original, current + 2) != 'E')
     484            0 :                                                         || StringAt(original, (current - 2), 6, "BACHER",
     485              :                                                                                 "MACHER", ""))))
     486              :                                 {
     487            0 :                                         MetaphAdd(primary, "K");
     488            0 :                                         MetaphAdd(secondary, "K");
     489            0 :                                         current += 2;
     490            0 :                                         break;
     491              :                                 }
     492              : 
     493              :                                 /* special case 'caesar' */
     494            0 :                                 if ((current == 0)
     495            0 :                                         && StringAt(original, current, 6, "CAESAR", ""))
     496              :                                 {
     497            0 :                                         MetaphAdd(primary, "S");
     498            0 :                                         MetaphAdd(secondary, "S");
     499            0 :                                         current += 2;
     500            0 :                                         break;
     501              :                                 }
     502              : 
     503              :                                 /* italian 'chianti' */
     504            0 :                                 if (StringAt(original, current, 4, "CHIA", ""))
     505              :                                 {
     506            0 :                                         MetaphAdd(primary, "K");
     507            0 :                                         MetaphAdd(secondary, "K");
     508            0 :                                         current += 2;
     509            0 :                                         break;
     510              :                                 }
     511              : 
     512            0 :                                 if (StringAt(original, current, 2, "CH", ""))
     513              :                                 {
     514              :                                         /* find 'michael' */
     515            0 :                                         if ((current > 0)
     516            0 :                                                 && StringAt(original, current, 4, "CHAE", ""))
     517              :                                         {
     518            0 :                                                 MetaphAdd(primary, "K");
     519            0 :                                                 MetaphAdd(secondary, "X");
     520            0 :                                                 current += 2;
     521            0 :                                                 break;
     522              :                                         }
     523              : 
     524              :                                         /* greek roots e.g. 'chemistry', 'chorus' */
     525            0 :                                         if ((current == 0)
     526            0 :                                                 && (StringAt(original, (current + 1), 5,
     527              :                                                                          "HARAC", "HARIS", "")
     528            0 :                                                         || StringAt(original, (current + 1), 3, "HOR",
     529              :                                                                                 "HYM", "HIA", "HEM", ""))
     530            0 :                                                 && !StringAt(original, 0, 5, "CHORE", ""))
     531              :                                         {
     532            0 :                                                 MetaphAdd(primary, "K");
     533            0 :                                                 MetaphAdd(secondary, "K");
     534            0 :                                                 current += 2;
     535            0 :                                                 break;
     536              :                                         }
     537              : 
     538              :                                         /* germanic, greek, or otherwise 'ch' for 'kh' sound */
     539            0 :                                         if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
     540            0 :                                                  || StringAt(original, 0, 3, "SCH", ""))
     541              :                                         /* 'architect but not 'arch', 'orchestra', 'orchid' */
     542            0 :                                                 || StringAt(original, (current - 2), 6, "ORCHES",
     543              :                                                                         "ARCHIT", "ORCHID", "")
     544            0 :                                                 || StringAt(original, (current + 2), 1, "T", "S",
     545              :                                                                         "")
     546            0 :                                                 || ((StringAt(original, (current - 1), 1,
     547              :                                                                           "A", "O", "U", "E", "")
     548            0 :                                                          || (current == 0))
     549              : 
     550              :                                         /*
     551              :                                          * e.g., 'wachtler', 'wechsler', but not 'tichner'
     552              :                                          */
     553            0 :                                                         && StringAt(original, (current + 2), 1, "L", "R",
     554              :                                                                                 "N", "M", "B", "H", "F", "V", "W",
     555              :                                                                                 " ", "")))
     556              :                                         {
     557            0 :                                                 MetaphAdd(primary, "K");
     558            0 :                                                 MetaphAdd(secondary, "K");
     559            0 :                                         }
     560              :                                         else
     561              :                                         {
     562            0 :                                                 if (current > 0)
     563              :                                                 {
     564            0 :                                                         if (StringAt(original, 0, 2, "MC", ""))
     565              :                                                         {
     566              :                                                                 /* e.g., "McHugh" */
     567            0 :                                                                 MetaphAdd(primary, "K");
     568            0 :                                                                 MetaphAdd(secondary, "K");
     569            0 :                                                         }
     570              :                                                         else
     571              :                                                         {
     572            0 :                                                                 MetaphAdd(primary, "X");
     573            0 :                                                                 MetaphAdd(secondary, "K");
     574              :                                                         }
     575            0 :                                                 }
     576              :                                                 else
     577              :                                                 {
     578            0 :                                                         MetaphAdd(primary, "X");
     579            0 :                                                         MetaphAdd(secondary, "X");
     580              :                                                 }
     581              :                                         }
     582            0 :                                         current += 2;
     583            0 :                                         break;
     584              :                                 }
     585              :                                 /* e.g, 'czerny' */
     586            0 :                                 if (StringAt(original, current, 2, "CZ", "")
     587            0 :                                         && !StringAt(original, (current - 2), 4, "WICZ", ""))
     588              :                                 {
     589            0 :                                         MetaphAdd(primary, "S");
     590            0 :                                         MetaphAdd(secondary, "X");
     591            0 :                                         current += 2;
     592            0 :                                         break;
     593              :                                 }
     594              : 
     595              :                                 /* e.g., 'focaccia' */
     596            0 :                                 if (StringAt(original, (current + 1), 3, "CIA", ""))
     597              :                                 {
     598            0 :                                         MetaphAdd(primary, "X");
     599            0 :                                         MetaphAdd(secondary, "X");
     600            0 :                                         current += 3;
     601            0 :                                         break;
     602              :                                 }
     603              : 
     604              :                                 /* double 'C', but not if e.g. 'McClellan' */
     605            0 :                                 if (StringAt(original, current, 2, "CC", "")
     606            0 :                                         && !((current == 1) && (GetAt(original, 0) == 'M')))
     607              :                                 {
     608              :                                         /* 'bellocchio' but not 'bacchus' */
     609            0 :                                         if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
     610            0 :                                                 && !StringAt(original, (current + 2), 2, "HU", ""))
     611              :                                         {
     612              :                                                 /* 'accident', 'accede' 'succeed' */
     613            0 :                                                 if (((current == 1)
     614            0 :                                                          && (GetAt(original, current - 1) == 'A'))
     615            0 :                                                         || StringAt(original, (current - 1), 5, "UCCEE",
     616              :                                                                                 "UCCES", ""))
     617              :                                                 {
     618            0 :                                                         MetaphAdd(primary, "KS");
     619            0 :                                                         MetaphAdd(secondary, "KS");
     620              :                                                         /* 'bacci', 'bertucci', other italian */
     621            0 :                                                 }
     622              :                                                 else
     623              :                                                 {
     624            0 :                                                         MetaphAdd(primary, "X");
     625            0 :                                                         MetaphAdd(secondary, "X");
     626              :                                                 }
     627            0 :                                                 current += 3;
     628            0 :                                                 break;
     629              :                                         }
     630              :                                         else
     631              :                                         {                       /* Pierce's rule */
     632            0 :                                                 MetaphAdd(primary, "K");
     633            0 :                                                 MetaphAdd(secondary, "K");
     634            0 :                                                 current += 2;
     635            0 :                                                 break;
     636              :                                         }
     637              :                                 }
     638              : 
     639            0 :                                 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
     640              :                                 {
     641            0 :                                         MetaphAdd(primary, "K");
     642            0 :                                         MetaphAdd(secondary, "K");
     643            0 :                                         current += 2;
     644            0 :                                         break;
     645              :                                 }
     646              : 
     647            0 :                                 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
     648              :                                 {
     649              :                                         /* italian vs. english */
     650            0 :                                         if (StringAt
     651            0 :                                                 (original, current, 3, "CIO", "CIE", "CIA", ""))
     652              :                                         {
     653            0 :                                                 MetaphAdd(primary, "S");
     654            0 :                                                 MetaphAdd(secondary, "X");
     655            0 :                                         }
     656              :                                         else
     657              :                                         {
     658            0 :                                                 MetaphAdd(primary, "S");
     659            0 :                                                 MetaphAdd(secondary, "S");
     660              :                                         }
     661            0 :                                         current += 2;
     662            0 :                                         break;
     663              :                                 }
     664              : 
     665              :                                 /* else */
     666            0 :                                 MetaphAdd(primary, "K");
     667            0 :                                 MetaphAdd(secondary, "K");
     668              : 
     669              :                                 /* name sent in 'mac caffrey', 'mac gregor */
     670            0 :                                 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
     671            0 :                                         current += 3;
     672            0 :                                 else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
     673            0 :                                                  && !StringAt(original, (current + 1), 2,
     674              :                                                                           "CE", "CI", ""))
     675            0 :                                         current += 2;
     676              :                                 else
     677            0 :                                         current += 1;
     678            0 :                                 break;
     679              : 
     680              :                         case 'D':
     681            0 :                                 if (StringAt(original, current, 2, "DG", ""))
     682              :                                 {
     683            0 :                                         if (StringAt(original, (current + 2), 1,
     684              :                                                                  "I", "E", "Y", ""))
     685              :                                         {
     686              :                                                 /* e.g. 'edge' */
     687            0 :                                                 MetaphAdd(primary, "J");
     688            0 :                                                 MetaphAdd(secondary, "J");
     689            0 :                                                 current += 3;
     690            0 :                                                 break;
     691              :                                         }
     692              :                                         else
     693              :                                         {
     694              :                                                 /* e.g. 'edgar' */
     695            0 :                                                 MetaphAdd(primary, "TK");
     696            0 :                                                 MetaphAdd(secondary, "TK");
     697            0 :                                                 current += 2;
     698            0 :                                                 break;
     699              :                                         }
     700              :                                 }
     701              : 
     702            0 :                                 if (StringAt(original, current, 2, "DT", "DD", ""))
     703              :                                 {
     704            0 :                                         MetaphAdd(primary, "T");
     705            0 :                                         MetaphAdd(secondary, "T");
     706            0 :                                         current += 2;
     707            0 :                                         break;
     708              :                                 }
     709              : 
     710              :                                 /* else */
     711            0 :                                 MetaphAdd(primary, "T");
     712            0 :                                 MetaphAdd(secondary, "T");
     713            0 :                                 current += 1;
     714            0 :                                 break;
     715              : 
     716              :                         case 'F':
     717            0 :                                 if (GetAt(original, current + 1) == 'F')
     718            0 :                                         current += 2;
     719              :                                 else
     720            0 :                                         current += 1;
     721            0 :                                 MetaphAdd(primary, "F");
     722            0 :                                 MetaphAdd(secondary, "F");
     723            0 :                                 break;
     724              : 
     725              :                         case 'G':
     726            0 :                                 if (GetAt(original, current + 1) == 'H')
     727              :                                 {
     728            0 :                                         if ((current > 0) && !IsVowel(original, current - 1))
     729              :                                         {
     730            0 :                                                 MetaphAdd(primary, "K");
     731            0 :                                                 MetaphAdd(secondary, "K");
     732            0 :                                                 current += 2;
     733            0 :                                                 break;
     734              :                                         }
     735              : 
     736            0 :                                         if (current < 3)
     737              :                                         {
     738              :                                                 /* 'ghislane', ghiradelli */
     739            0 :                                                 if (current == 0)
     740              :                                                 {
     741            0 :                                                         if (GetAt(original, current + 2) == 'I')
     742              :                                                         {
     743            0 :                                                                 MetaphAdd(primary, "J");
     744            0 :                                                                 MetaphAdd(secondary, "J");
     745            0 :                                                         }
     746              :                                                         else
     747              :                                                         {
     748            0 :                                                                 MetaphAdd(primary, "K");
     749            0 :                                                                 MetaphAdd(secondary, "K");
     750              :                                                         }
     751            0 :                                                         current += 2;
     752            0 :                                                         break;
     753              :                                                 }
     754            0 :                                         }
     755              : 
     756              :                                         /*
     757              :                                          * Parker's rule (with some further refinements) - e.g.,
     758              :                                          * 'hugh'
     759              :                                          */
     760            0 :                                         if (((current > 1)
     761            0 :                                                  && StringAt(original, (current - 2), 1,
     762              :                                                                          "B", "H", "D", ""))
     763              :                                         /* e.g., 'bough' */
     764            0 :                                                 || ((current > 2)
     765            0 :                                                         && StringAt(original, (current - 3), 1,
     766              :                                                                                 "B", "H", "D", ""))
     767              :                                         /* e.g., 'broughton' */
     768            0 :                                                 || ((current > 3)
     769            0 :                                                         && StringAt(original, (current - 4), 1,
     770              :                                                                                 "B", "H", "")))
     771              :                                         {
     772            0 :                                                 current += 2;
     773            0 :                                                 break;
     774              :                                         }
     775              :                                         else
     776              :                                         {
     777              :                                                 /*
     778              :                                                  * e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
     779              :                                                  * 'rough', 'tough'
     780              :                                                  */
     781            0 :                                                 if ((current > 2)
     782            0 :                                                         && (GetAt(original, current - 1) == 'U')
     783            0 :                                                         && StringAt(original, (current - 3), 1, "C",
     784              :                                                                                 "G", "L", "R", "T", ""))
     785              :                                                 {
     786            0 :                                                         MetaphAdd(primary, "F");
     787            0 :                                                         MetaphAdd(secondary, "F");
     788            0 :                                                 }
     789            0 :                                                 else if ((current > 0)
     790            0 :                                                                  && GetAt(original, current - 1) != 'I')
     791              :                                                 {
     792              : 
     793              : 
     794            0 :                                                         MetaphAdd(primary, "K");
     795            0 :                                                         MetaphAdd(secondary, "K");
     796            0 :                                                 }
     797              : 
     798            0 :                                                 current += 2;
     799            0 :                                                 break;
     800              :                                         }
     801              :                                 }
     802              : 
     803            0 :                                 if (GetAt(original, current + 1) == 'N')
     804              :                                 {
     805            0 :                                         if ((current == 1) && IsVowel(original, 0)
     806            0 :                                                 && !SlavoGermanic(original))
     807              :                                         {
     808            0 :                                                 MetaphAdd(primary, "KN");
     809            0 :                                                 MetaphAdd(secondary, "N");
     810            0 :                                         }
     811              :                                         else
     812              :                                                 /* not e.g. 'cagney' */
     813            0 :                                                 if (!StringAt(original, (current + 2), 2, "EY", "")
     814            0 :                                                         && (GetAt(original, current + 1) != 'Y')
     815            0 :                                                         && !SlavoGermanic(original))
     816              :                                         {
     817            0 :                                                 MetaphAdd(primary, "N");
     818            0 :                                                 MetaphAdd(secondary, "KN");
     819            0 :                                         }
     820              :                                         else
     821              :                                         {
     822            0 :                                                 MetaphAdd(primary, "KN");
     823            0 :                                                 MetaphAdd(secondary, "KN");
     824              :                                         }
     825            0 :                                         current += 2;
     826            0 :                                         break;
     827              :                                 }
     828              : 
     829              :                                 /* 'tagliaro' */
     830            0 :                                 if (StringAt(original, (current + 1), 2, "LI", "")
     831            0 :                                         && !SlavoGermanic(original))
     832              :                                 {
     833            0 :                                         MetaphAdd(primary, "KL");
     834            0 :                                         MetaphAdd(secondary, "L");
     835            0 :                                         current += 2;
     836            0 :                                         break;
     837              :                                 }
     838              : 
     839              :                                 /* -ges-,-gep-,-gel-, -gie- at beginning */
     840            0 :                                 if ((current == 0)
     841            0 :                                         && ((GetAt(original, current + 1) == 'Y')
     842            0 :                                                 || StringAt(original, (current + 1), 2, "ES", "EP",
     843              :                                                                         "EB", "EL", "EY", "IB", "IL", "IN", "IE",
     844              :                                                                         "EI", "ER", "")))
     845              :                                 {
     846            0 :                                         MetaphAdd(primary, "K");
     847            0 :                                         MetaphAdd(secondary, "J");
     848            0 :                                         current += 2;
     849            0 :                                         break;
     850              :                                 }
     851              : 
     852              :                                 /* -ger-,  -gy- */
     853            0 :                                 if ((StringAt(original, (current + 1), 2, "ER", "")
     854            0 :                                          || (GetAt(original, current + 1) == 'Y'))
     855            0 :                                         && !StringAt(original, 0, 6,
     856              :                                                                  "DANGER", "RANGER", "MANGER", "")
     857            0 :                                         && !StringAt(original, (current - 1), 1, "E", "I", "")
     858            0 :                                         && !StringAt(original, (current - 1), 3, "RGY", "OGY", ""))
     859              :                                 {
     860            0 :                                         MetaphAdd(primary, "K");
     861            0 :                                         MetaphAdd(secondary, "J");
     862            0 :                                         current += 2;
     863            0 :                                         break;
     864              :                                 }
     865              : 
     866              :                                 /* italian e.g, 'biaggi' */
     867            0 :                                 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
     868            0 :                                         || StringAt(original, (current - 1), 4,
     869              :                                                                 "AGGI", "OGGI", ""))
     870              :                                 {
     871              :                                         /* obvious germanic */
     872            0 :                                         if ((StringAt(original, 0, 4, "VAN ", "VON ", "")
     873            0 :                                                  || StringAt(original, 0, 3, "SCH", ""))
     874            0 :                                                 || StringAt(original, (current + 1), 2, "ET", ""))
     875              :                                         {
     876            0 :                                                 MetaphAdd(primary, "K");
     877            0 :                                                 MetaphAdd(secondary, "K");
     878            0 :                                         }
     879              :                                         else
     880              :                                         {
     881              :                                                 /* always soft if french ending */
     882            0 :                                                 if (StringAt
     883            0 :                                                         (original, (current + 1), 4, "IER ", ""))
     884              :                                                 {
     885            0 :                                                         MetaphAdd(primary, "J");
     886            0 :                                                         MetaphAdd(secondary, "J");
     887            0 :                                                 }
     888              :                                                 else
     889              :                                                 {
     890            0 :                                                         MetaphAdd(primary, "J");
     891            0 :                                                         MetaphAdd(secondary, "K");
     892              :                                                 }
     893              :                                         }
     894            0 :                                         current += 2;
     895            0 :                                         break;
     896              :                                 }
     897              : 
     898            0 :                                 if (GetAt(original, current + 1) == 'G')
     899            0 :                                         current += 2;
     900              :                                 else
     901            0 :                                         current += 1;
     902            0 :                                 MetaphAdd(primary, "K");
     903            0 :                                 MetaphAdd(secondary, "K");
     904            0 :                                 break;
     905              : 
     906              :                         case 'H':
     907              :                                 /* only keep if first & before vowel or btw. 2 vowels */
     908            0 :                                 if (((current == 0) || IsVowel(original, current - 1))
     909            0 :                                         && IsVowel(original, current + 1))
     910              :                                 {
     911            0 :                                         MetaphAdd(primary, "H");
     912            0 :                                         MetaphAdd(secondary, "H");
     913            0 :                                         current += 2;
     914            0 :                                 }
     915              :                                 else
     916              :                                         /* also takes care of 'HH' */
     917            0 :                                         current += 1;
     918            0 :                                 break;
     919              : 
     920              :                         case 'J':
     921              :                                 /* obvious spanish, 'jose', 'san jacinto' */
     922            0 :                                 if (StringAt(original, current, 4, "JOSE", "")
     923            0 :                                         || StringAt(original, 0, 4, "SAN ", ""))
     924              :                                 {
     925            0 :                                         if (((current == 0)
     926            0 :                                                  && (GetAt(original, current + 4) == ' '))
     927            0 :                                                 || StringAt(original, 0, 4, "SAN ", ""))
     928              :                                         {
     929            0 :                                                 MetaphAdd(primary, "H");
     930            0 :                                                 MetaphAdd(secondary, "H");
     931            0 :                                         }
     932              :                                         else
     933              :                                         {
     934            0 :                                                 MetaphAdd(primary, "J");
     935            0 :                                                 MetaphAdd(secondary, "H");
     936              :                                         }
     937            0 :                                         current += 1;
     938            0 :                                         break;
     939              :                                 }
     940              : 
     941            0 :                                 if ((current == 0)
     942            0 :                                         && !StringAt(original, current, 4, "JOSE", ""))
     943              :                                 {
     944            0 :                                         MetaphAdd(primary, "J");      /* Yankelovich/Jankelowicz */
     945            0 :                                         MetaphAdd(secondary, "A");
     946            0 :                                 }
     947              :                                 else
     948              :                                 {
     949              :                                         /* spanish pron. of e.g. 'bajador' */
     950            0 :                                         if (IsVowel(original, current - 1)
     951            0 :                                                 && !SlavoGermanic(original)
     952            0 :                                                 && ((GetAt(original, current + 1) == 'A')
     953            0 :                                                         || (GetAt(original, current + 1) == 'O')))
     954              :                                         {
     955            0 :                                                 MetaphAdd(primary, "J");
     956            0 :                                                 MetaphAdd(secondary, "H");
     957            0 :                                         }
     958              :                                         else
     959              :                                         {
     960            0 :                                                 if (current == last)
     961              :                                                 {
     962            0 :                                                         MetaphAdd(primary, "J");
     963            0 :                                                         MetaphAdd(secondary, "");
     964            0 :                                                 }
     965              :                                                 else
     966              :                                                 {
     967            0 :                                                         if (!StringAt(original, (current + 1), 1, "L", "T",
     968              :                                                                                   "K", "S", "N", "M", "B", "Z", "")
     969            0 :                                                                 && !StringAt(original, (current - 1), 1,
     970              :                                                                                          "S", "K", "L", ""))
     971              :                                                         {
     972            0 :                                                                 MetaphAdd(primary, "J");
     973            0 :                                                                 MetaphAdd(secondary, "J");
     974            0 :                                                         }
     975              :                                                 }
     976              :                                         }
     977              :                                 }
     978              : 
     979            0 :                                 if (GetAt(original, current + 1) == 'J')        /* it could happen! */
     980            0 :                                         current += 2;
     981              :                                 else
     982            0 :                                         current += 1;
     983            0 :                                 break;
     984              : 
     985              :                         case 'K':
     986            0 :                                 if (GetAt(original, current + 1) == 'K')
     987            0 :                                         current += 2;
     988              :                                 else
     989            0 :                                         current += 1;
     990            0 :                                 MetaphAdd(primary, "K");
     991            0 :                                 MetaphAdd(secondary, "K");
     992            0 :                                 break;
     993              : 
     994              :                         case 'L':
     995            0 :                                 if (GetAt(original, current + 1) == 'L')
     996              :                                 {
     997              :                                         /* spanish e.g. 'cabrillo', 'gallegos' */
     998            0 :                                         if (((current == (length - 3))
     999            0 :                                                  && StringAt(original, (current - 1), 4, "ILLO",
    1000              :                                                                          "ILLA", "ALLE", ""))
    1001            0 :                                                 || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
    1002            0 :                                                          || StringAt(original, last, 1, "A", "O", ""))
    1003            0 :                                                         && StringAt(original, (current - 1), 4,
    1004              :                                                                                 "ALLE", "")))
    1005              :                                         {
    1006            0 :                                                 MetaphAdd(primary, "L");
    1007            0 :                                                 MetaphAdd(secondary, "");
    1008            0 :                                                 current += 2;
    1009            0 :                                                 break;
    1010              :                                         }
    1011            0 :                                         current += 2;
    1012            0 :                                 }
    1013              :                                 else
    1014            0 :                                         current += 1;
    1015            0 :                                 MetaphAdd(primary, "L");
    1016            0 :                                 MetaphAdd(secondary, "L");
    1017            0 :                                 break;
    1018              : 
    1019              :                         case 'M':
    1020            0 :                                 if ((StringAt(original, (current - 1), 3, "UMB", "")
    1021            0 :                                          && (((current + 1) == last)
    1022            0 :                                                  || StringAt(original, (current + 2), 2, "ER", "")))
    1023              :                                 /* 'dumb','thumb' */
    1024            0 :                                         || (GetAt(original, current + 1) == 'M'))
    1025            0 :                                         current += 2;
    1026              :                                 else
    1027            0 :                                         current += 1;
    1028            0 :                                 MetaphAdd(primary, "M");
    1029            0 :                                 MetaphAdd(secondary, "M");
    1030            0 :                                 break;
    1031              : 
    1032              :                         case 'N':
    1033            0 :                                 if (GetAt(original, current + 1) == 'N')
    1034            0 :                                         current += 2;
    1035              :                                 else
    1036            0 :                                         current += 1;
    1037            0 :                                 MetaphAdd(primary, "N");
    1038            0 :                                 MetaphAdd(secondary, "N");
    1039            0 :                                 break;
    1040              : 
    1041              :                         case '\xd1':            /* N with tilde */
    1042            0 :                                 current += 1;
    1043            0 :                                 MetaphAdd(primary, "N");
    1044            0 :                                 MetaphAdd(secondary, "N");
    1045            0 :                                 break;
    1046              : 
    1047              :                         case 'P':
    1048            0 :                                 if (GetAt(original, current + 1) == 'H')
    1049              :                                 {
    1050            0 :                                         MetaphAdd(primary, "F");
    1051            0 :                                         MetaphAdd(secondary, "F");
    1052            0 :                                         current += 2;
    1053            0 :                                         break;
    1054              :                                 }
    1055              : 
    1056              :                                 /* also account for "campbell", "raspberry" */
    1057            0 :                                 if (StringAt(original, (current + 1), 1, "P", "B", ""))
    1058            0 :                                         current += 2;
    1059              :                                 else
    1060            0 :                                         current += 1;
    1061            0 :                                 MetaphAdd(primary, "P");
    1062            0 :                                 MetaphAdd(secondary, "P");
    1063            0 :                                 break;
    1064              : 
    1065              :                         case 'Q':
    1066            0 :                                 if (GetAt(original, current + 1) == 'Q')
    1067            0 :                                         current += 2;
    1068              :                                 else
    1069            0 :                                         current += 1;
    1070            0 :                                 MetaphAdd(primary, "K");
    1071            0 :                                 MetaphAdd(secondary, "K");
    1072            0 :                                 break;
    1073              : 
    1074              :                         case 'R':
    1075              :                                 /* french e.g. 'rogier', but exclude 'hochmeier' */
    1076            0 :                                 if ((current == last)
    1077            0 :                                         && !SlavoGermanic(original)
    1078            0 :                                         && StringAt(original, (current - 2), 2, "IE", "")
    1079            0 :                                         && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
    1080              :                                 {
    1081            0 :                                         MetaphAdd(primary, "");
    1082            0 :                                         MetaphAdd(secondary, "R");
    1083            0 :                                 }
    1084              :                                 else
    1085              :                                 {
    1086            0 :                                         MetaphAdd(primary, "R");
    1087            0 :                                         MetaphAdd(secondary, "R");
    1088              :                                 }
    1089              : 
    1090            0 :                                 if (GetAt(original, current + 1) == 'R')
    1091            0 :                                         current += 2;
    1092              :                                 else
    1093            0 :                                         current += 1;
    1094            0 :                                 break;
    1095              : 
    1096              :                         case 'S':
    1097              :                                 /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
    1098            0 :                                 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
    1099              :                                 {
    1100            0 :                                         current += 1;
    1101            0 :                                         break;
    1102              :                                 }
    1103              : 
    1104              :                                 /* special case 'sugar-' */
    1105            0 :                                 if ((current == 0)
    1106            0 :                                         && StringAt(original, current, 5, "SUGAR", ""))
    1107              :                                 {
    1108            0 :                                         MetaphAdd(primary, "X");
    1109            0 :                                         MetaphAdd(secondary, "S");
    1110            0 :                                         current += 1;
    1111            0 :                                         break;
    1112              :                                 }
    1113              : 
    1114            0 :                                 if (StringAt(original, current, 2, "SH", ""))
    1115              :                                 {
    1116              :                                         /* germanic */
    1117            0 :                                         if (StringAt
    1118            0 :                                                 (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
    1119              :                                                  "HOLZ", ""))
    1120              :                                         {
    1121            0 :                                                 MetaphAdd(primary, "S");
    1122            0 :                                                 MetaphAdd(secondary, "S");
    1123            0 :                                         }
    1124              :                                         else
    1125              :                                         {
    1126            0 :                                                 MetaphAdd(primary, "X");
    1127            0 :                                                 MetaphAdd(secondary, "X");
    1128              :                                         }
    1129            0 :                                         current += 2;
    1130            0 :                                         break;
    1131              :                                 }
    1132              : 
    1133              :                                 /* italian & armenian */
    1134            0 :                                 if (StringAt(original, current, 3, "SIO", "SIA", "")
    1135            0 :                                         || StringAt(original, current, 4, "SIAN", ""))
    1136              :                                 {
    1137            0 :                                         if (!SlavoGermanic(original))
    1138              :                                         {
    1139            0 :                                                 MetaphAdd(primary, "S");
    1140            0 :                                                 MetaphAdd(secondary, "X");
    1141            0 :                                         }
    1142              :                                         else
    1143              :                                         {
    1144            0 :                                                 MetaphAdd(primary, "S");
    1145            0 :                                                 MetaphAdd(secondary, "S");
    1146              :                                         }
    1147            0 :                                         current += 3;
    1148            0 :                                         break;
    1149              :                                 }
    1150              : 
    1151              :                                 /*
    1152              :                                  * german & anglicisations, e.g. 'smith' match 'schmidt',
    1153              :                                  * 'snider' match 'schneider' also, -sz- in slavic language
    1154              :                                  * although in hungarian it is pronounced 's'
    1155              :                                  */
    1156            0 :                                 if (((current == 0)
    1157            0 :                                          && StringAt(original, (current + 1), 1,
    1158              :                                                                  "M", "N", "L", "W", ""))
    1159            0 :                                         || StringAt(original, (current + 1), 1, "Z", ""))
    1160              :                                 {
    1161            0 :                                         MetaphAdd(primary, "S");
    1162            0 :                                         MetaphAdd(secondary, "X");
    1163            0 :                                         if (StringAt(original, (current + 1), 1, "Z", ""))
    1164            0 :                                                 current += 2;
    1165              :                                         else
    1166            0 :                                                 current += 1;
    1167            0 :                                         break;
    1168              :                                 }
    1169              : 
    1170            0 :                                 if (StringAt(original, current, 2, "SC", ""))
    1171              :                                 {
    1172              :                                         /* Schlesinger's rule */
    1173            0 :                                         if (GetAt(original, current + 2) == 'H')
    1174              :                                         {
    1175              :                                                 /* dutch origin, e.g. 'school', 'schooner' */
    1176            0 :                                                 if (StringAt(original, (current + 3), 2,
    1177              :                                                                          "OO", "ER", "EN",
    1178              :                                                                          "UY", "ED", "EM", ""))
    1179              :                                                 {
    1180              :                                                         /* 'schermerhorn', 'schenker' */
    1181            0 :                                                         if (StringAt(original, (current + 3), 2,
    1182              :                                                                                  "ER", "EN", ""))
    1183              :                                                         {
    1184            0 :                                                                 MetaphAdd(primary, "X");
    1185            0 :                                                                 MetaphAdd(secondary, "SK");
    1186            0 :                                                         }
    1187              :                                                         else
    1188              :                                                         {
    1189            0 :                                                                 MetaphAdd(primary, "SK");
    1190            0 :                                                                 MetaphAdd(secondary, "SK");
    1191              :                                                         }
    1192            0 :                                                         current += 3;
    1193            0 :                                                         break;
    1194              :                                                 }
    1195              :                                                 else
    1196              :                                                 {
    1197            0 :                                                         if ((current == 0) && !IsVowel(original, 3)
    1198            0 :                                                                 && (GetAt(original, 3) != 'W'))
    1199              :                                                         {
    1200            0 :                                                                 MetaphAdd(primary, "X");
    1201            0 :                                                                 MetaphAdd(secondary, "S");
    1202            0 :                                                         }
    1203              :                                                         else
    1204              :                                                         {
    1205            0 :                                                                 MetaphAdd(primary, "X");
    1206            0 :                                                                 MetaphAdd(secondary, "X");
    1207              :                                                         }
    1208            0 :                                                         current += 3;
    1209            0 :                                                         break;
    1210              :                                                 }
    1211              :                                         }
    1212              : 
    1213            0 :                                         if (StringAt(original, (current + 2), 1,
    1214              :                                                                  "I", "E", "Y", ""))
    1215              :                                         {
    1216            0 :                                                 MetaphAdd(primary, "S");
    1217            0 :                                                 MetaphAdd(secondary, "S");
    1218            0 :                                                 current += 3;
    1219            0 :                                                 break;
    1220              :                                         }
    1221              :                                         /* else */
    1222            0 :                                         MetaphAdd(primary, "SK");
    1223            0 :                                         MetaphAdd(secondary, "SK");
    1224            0 :                                         current += 3;
    1225            0 :                                         break;
    1226              :                                 }
    1227              : 
    1228              :                                 /* french e.g. 'resnais', 'artois' */
    1229            0 :                                 if ((current == last)
    1230            0 :                                         && StringAt(original, (current - 2), 2, "AI", "OI", ""))
    1231              :                                 {
    1232            0 :                                         MetaphAdd(primary, "");
    1233            0 :                                         MetaphAdd(secondary, "S");
    1234            0 :                                 }
    1235              :                                 else
    1236              :                                 {
    1237            0 :                                         MetaphAdd(primary, "S");
    1238            0 :                                         MetaphAdd(secondary, "S");
    1239              :                                 }
    1240              : 
    1241            0 :                                 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
    1242            0 :                                         current += 2;
    1243              :                                 else
    1244            0 :                                         current += 1;
    1245            0 :                                 break;
    1246              : 
    1247              :                         case 'T':
    1248            0 :                                 if (StringAt(original, current, 4, "TION", ""))
    1249              :                                 {
    1250            0 :                                         MetaphAdd(primary, "X");
    1251            0 :                                         MetaphAdd(secondary, "X");
    1252            0 :                                         current += 3;
    1253            0 :                                         break;
    1254              :                                 }
    1255              : 
    1256            0 :                                 if (StringAt(original, current, 3, "TIA", "TCH", ""))
    1257              :                                 {
    1258            0 :                                         MetaphAdd(primary, "X");
    1259            0 :                                         MetaphAdd(secondary, "X");
    1260            0 :                                         current += 3;
    1261            0 :                                         break;
    1262              :                                 }
    1263              : 
    1264            0 :                                 if (StringAt(original, current, 2, "TH", "")
    1265            0 :                                         || StringAt(original, current, 3, "TTH", ""))
    1266              :                                 {
    1267              :                                         /* special case 'thomas', 'thames' or germanic */
    1268            0 :                                         if (StringAt(original, (current + 2), 2, "OM", "AM", "")
    1269            0 :                                                 || StringAt(original, 0, 4, "VAN ", "VON ", "")
    1270            0 :                                                 || StringAt(original, 0, 3, "SCH", ""))
    1271              :                                         {
    1272            0 :                                                 MetaphAdd(primary, "T");
    1273            0 :                                                 MetaphAdd(secondary, "T");
    1274            0 :                                         }
    1275              :                                         else
    1276              :                                         {
    1277            0 :                                                 MetaphAdd(primary, "0");
    1278            0 :                                                 MetaphAdd(secondary, "T");
    1279              :                                         }
    1280            0 :                                         current += 2;
    1281            0 :                                         break;
    1282              :                                 }
    1283              : 
    1284            0 :                                 if (StringAt(original, (current + 1), 1, "T", "D", ""))
    1285            0 :                                         current += 2;
    1286              :                                 else
    1287            0 :                                         current += 1;
    1288            0 :                                 MetaphAdd(primary, "T");
    1289            0 :                                 MetaphAdd(secondary, "T");
    1290            0 :                                 break;
    1291              : 
    1292              :                         case 'V':
    1293            0 :                                 if (GetAt(original, current + 1) == 'V')
    1294            0 :                                         current += 2;
    1295              :                                 else
    1296            0 :                                         current += 1;
    1297            0 :                                 MetaphAdd(primary, "F");
    1298            0 :                                 MetaphAdd(secondary, "F");
    1299            0 :                                 break;
    1300              : 
    1301              :                         case 'W':
    1302              :                                 /* can also be in middle of word */
    1303            0 :                                 if (StringAt(original, current, 2, "WR", ""))
    1304              :                                 {
    1305            0 :                                         MetaphAdd(primary, "R");
    1306            0 :                                         MetaphAdd(secondary, "R");
    1307            0 :                                         current += 2;
    1308            0 :                                         break;
    1309              :                                 }
    1310              : 
    1311            0 :                                 if ((current == 0)
    1312            0 :                                         && (IsVowel(original, current + 1)
    1313            0 :                                                 || StringAt(original, current, 2, "WH", "")))
    1314              :                                 {
    1315              :                                         /* Wasserman should match Vasserman */
    1316            0 :                                         if (IsVowel(original, current + 1))
    1317              :                                         {
    1318            0 :                                                 MetaphAdd(primary, "A");
    1319            0 :                                                 MetaphAdd(secondary, "F");
    1320            0 :                                         }
    1321              :                                         else
    1322              :                                         {
    1323              :                                                 /* need Uomo to match Womo */
    1324            0 :                                                 MetaphAdd(primary, "A");
    1325            0 :                                                 MetaphAdd(secondary, "A");
    1326              :                                         }
    1327            0 :                                 }
    1328              : 
    1329              :                                 /* Arnow should match Arnoff */
    1330            0 :                                 if (((current == last) && IsVowel(original, current - 1))
    1331            0 :                                         || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
    1332              :                                                                 "OWSKI", "OWSKY", "")
    1333            0 :                                         || StringAt(original, 0, 3, "SCH", ""))
    1334              :                                 {
    1335            0 :                                         MetaphAdd(primary, "");
    1336            0 :                                         MetaphAdd(secondary, "F");
    1337            0 :                                         current += 1;
    1338            0 :                                         break;
    1339              :                                 }
    1340              : 
    1341              :                                 /* polish e.g. 'filipowicz' */
    1342            0 :                                 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
    1343              :                                 {
    1344            0 :                                         MetaphAdd(primary, "TS");
    1345            0 :                                         MetaphAdd(secondary, "FX");
    1346            0 :                                         current += 4;
    1347            0 :                                         break;
    1348              :                                 }
    1349              : 
    1350              :                                 /* else skip it */
    1351            0 :                                 current += 1;
    1352            0 :                                 break;
    1353              : 
    1354              :                         case 'X':
    1355              :                                 /* french e.g. breaux */
    1356            0 :                                 if (!((current == last)
    1357            0 :                                           && (StringAt(original, (current - 3), 3,
    1358              :                                                                    "IAU", "EAU", "")
    1359            0 :                                                   || StringAt(original, (current - 2), 2,
    1360              :                                                                           "AU", "OU", ""))))
    1361              :                                 {
    1362            0 :                                         MetaphAdd(primary, "KS");
    1363            0 :                                         MetaphAdd(secondary, "KS");
    1364            0 :                                 }
    1365              : 
    1366              : 
    1367            0 :                                 if (StringAt(original, (current + 1), 1, "C", "X", ""))
    1368            0 :                                         current += 2;
    1369              :                                 else
    1370            0 :                                         current += 1;
    1371            0 :                                 break;
    1372              : 
    1373              :                         case 'Z':
    1374              :                                 /* chinese pinyin e.g. 'zhao' */
    1375            0 :                                 if (GetAt(original, current + 1) == 'H')
    1376              :                                 {
    1377            0 :                                         MetaphAdd(primary, "J");
    1378            0 :                                         MetaphAdd(secondary, "J");
    1379            0 :                                         current += 2;
    1380            0 :                                         break;
    1381              :                                 }
    1382            0 :                                 else if (StringAt(original, (current + 1), 2,
    1383              :                                                                   "ZO", "ZI", "ZA", "")
    1384            0 :                                                  || (SlavoGermanic(original)
    1385            0 :                                                          && ((current > 0)
    1386            0 :                                                                  && GetAt(original, current - 1) != 'T')))
    1387              :                                 {
    1388            0 :                                         MetaphAdd(primary, "S");
    1389            0 :                                         MetaphAdd(secondary, "TS");
    1390            0 :                                 }
    1391              :                                 else
    1392              :                                 {
    1393            0 :                                         MetaphAdd(primary, "S");
    1394            0 :                                         MetaphAdd(secondary, "S");
    1395              :                                 }
    1396              : 
    1397            0 :                                 if (GetAt(original, current + 1) == 'Z')
    1398            0 :                                         current += 2;
    1399              :                                 else
    1400            0 :                                         current += 1;
    1401            0 :                                 break;
    1402              : 
    1403              :                         default:
    1404            0 :                                 current += 1;
    1405            0 :                 }
    1406              : 
    1407              :                 /*
    1408              :                  * printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n",
    1409              :                  * secondary->str);
    1410              :                  */
    1411              :         }
    1412              : 
    1413              : 
    1414            0 :         if (primary->length > 4)
    1415            0 :                 SetAt(primary, 4, '\0');
    1416              : 
    1417            0 :         if (secondary->length > 4)
    1418            0 :                 SetAt(secondary, 4, '\0');
    1419              : 
    1420            0 :         *codes = primary->str;
    1421            0 :         *++codes = secondary->str;
    1422              : 
    1423            0 :         DestroyMetaString(original);
    1424            0 :         DestroyMetaString(primary);
    1425            0 :         DestroyMetaString(secondary);
    1426            0 : }
    1427              : 
    1428              : #ifdef DMETAPHONE_MAIN
    1429              : 
    1430              : /* just for testing - not part of the perl code */
    1431              : 
    1432              : main(int argc, char **argv)
    1433              : {
    1434              :         char       *codes[2];
    1435              : 
    1436              :         if (argc > 1)
    1437              :         {
    1438              :                 DoubleMetaphone(argv[1], DEFAULT_COLLATION_OID, codes);
    1439              :                 printf("%s|%s\n", codes[0], codes[1]);
    1440              :         }
    1441              : }
    1442              : 
    1443              : #endif
        

Generated by: LCOV version 2.3.2-1