|
Class: DoubleMetaphoneStringComparator (private in PhoneticStringUtilities
This class is only visible from within
PhoneticStringUtilities.
Object
|
+--PhoneticStringUtilities::PhoneticStringComparator
|
+--PhoneticStringUtilities::DoubleMetaphoneStringComparator
- Package:
- stx:libbasic2
- Category:
- Collections-Text-Support
- Owner:
- PhoneticStringUtilities
The Double Metaphone algorithm
see internet: https://en.wikipedia.org/wiki/Metaphone
LICENSE
-
copyright
-
Copyright (c) 2002-2004 Robert Jarvis
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.'
classification
-
isSlavoGermanic: aString
-
self isSlavoGermanic:'walter'
self isSlavoGermanic:'horowitz'
self isSlavoGermanic:'müller'
self isSlavoGermanic:'miller'
accessing
-
currentIndex
-
-
currentIndex: anInteger
-
-
inputKey
-
-
inputKey: aString
-
care for diareses
-
primaryTranslation
-
-
primaryTranslation: anObject
-
-
secondaryTranslation
-
-
secondaryTranslation: anObject
-
-
skipCount
-
-
skipCount: anInteger
-
-
startIndex
-
-
startIndex: anObject
-
api
-
phoneticStringsFor: aString
-
Private - Answers an array of alternate phonetic strings for the given input string.
Usage example(s):
PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'muller'
PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'mueller'
PhoneticStringUtilities::DoubleMetaphoneStringComparator new phoneticStringsFor:'müller'
|
initialization
-
initialize
-
(comment from inherited method)
Invoked when a new instance is created.
private
-
addPrimaryTranslation: aString
-
-
addPrimaryTranslation: pString addSecondaryTranslation: sString
-
-
addSecondaryTranslation: aString
-
-
addToBothTranslations: aString
-
-
isSlavoGermanic: aString
-
-
keyAt: anInteger
-
-
keyLeftString: lengthInteger
-
-
keyMidString: lengthInteger from: fromInteger
-
Prepend spaces if caller is requesting characters from before the start of the string
-
keyRightString: lengthInteger
-
-
performInitialProcessing
-
-
processB
-
-
processC
-
i
-
processCedille
-
-
processD
-
i
-
processF
-
Modified (format): / 28-07-2017 / 11:29:21 / cg
-
processG
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'G':
if(GetAt(current + 1) == 'H')
{
-
processH
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'H':
//only keep if first & before vowel or btw. 2 vowels
if(((current == 0) OR IsVowel(current - 1))
AND IsVowel(current + 1))
{
MetaphAdd(H);
current += 2;
}else//also takes care of 'HH'
current += 1;
break;
-
processJ
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'J':
//obvious spanish, 'jose', 'san jacinto'
if(StringAt(current, 4, JOSE, ) OR StringAt(0, 4, SAN , ) )
{
if(((current == 0) AND (GetAt(current + 4) == ' ')) OR StringAt(0, 4, SAN , ) )
MetaphAdd(H);
else
{
MetaphAdd(J, H);
}
current +=1;
break;
}
if((current == 0) AND !StringAt(current, 4, JOSE, ))
MetaphAdd(J, A);//Yankelovich/Jankelowicz
else
//spanish pron. of e.g. 'bajador'
if(IsVowel(current - 1)
AND !SlavoGermanic()
AND ((GetAt(current + 1) == 'A') OR (GetAt(current + 1) == 'O')))
MetaphAdd(J, H);
else
if(current == last)
MetaphAdd(J, );
else
if(!StringAt((current + 1), 1, L, T, K, S, N, M, B, Z, )
AND !StringAt((current - 1), 1, S, K, L, ))
MetaphAdd(J);
if(GetAt(current + 1) == 'J')//it could happen!
current += 2;
else
current += 1;
break;
-
processK
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'K':
if(GetAt(current + 1) == 'K')
current += 2;
else
current += 1;
MetaphAdd(K);
break;
-
processL
-
case 'L':
if(GetAt(current + 1) == 'L')
{
//spanish e.g. 'cabrillo', 'gallegos'
if(((current == (length - 3))
AND StringAt((current - 1), 4, ILLO, ILLA, ALLE, ))
OR ((StringAt((last - 1), 2, AS, OS, ) OR StringAt(last, 1, A, O, ))
AND StringAt((current - 1), 4, ALLE, )) )
{
MetaphAdd(L, );
current += 2;
break;
}
current += 2;
}else
current += 1;
MetaphAdd(L);
break;
-
processM
-
case 'M':
if((StringAt((current - 1), 3, UMB, )
AND (((current + 1) == last) OR StringAt((current + 2), 2, ER, )))
//'dumb','thumb'
OR (GetAt(current + 1) == 'M') )
current += 2;
else
current += 1;
MetaphAdd(M);
break;
-
processN
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'N':
if(GetAt(current + 1) == 'N')
current += 2;
else
current += 1;
MetaphAdd(N);
break;
-
processNtilde
-
case 'Ñ':
current += 1;
MetaphAdd(N);
break;
-
processP
-
case 'P':
if(GetAt(current + 1) == 'H')
{
MetaphAdd(F);
current += 2;
break;
}
//also account for campbell, raspberry
if(StringAt((current + 1), 1, P, B, ))
current += 2;
else
current += 1;
MetaphAdd(P);
break;
-
processQ
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'Q':
if(GetAt(current + 1) == 'Q')
current += 2;
else
current += 1;
MetaphAdd(K);
break;
-
processR
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'R':
//french e.g. 'rogier', but exclude 'hochmeier'
if((current == last)
AND !SlavoGermanic()
AND StringAt((current - 2), 2, IE, )
AND !StringAt((current - 4), 2, ME, MA, ))
MetaphAdd(, R);
else
MetaphAdd(R);
if(GetAt(current + 1) == 'R')
current += 2;
else
current += 1;
break;
-
processRemainingCharacters
-
-
processS
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'S':
//special cases 'island', 'isle', 'carlisle', 'carlysle'
if(StringAt((current - 1), 3, ISL, YSL, ))
{
current += 1;
break;
}
//special case 'sugar-'
if((current == 0) AND StringAt(current, 5, SUGAR, ))
{
MetaphAdd(X, S);
current += 1;
break;
}
if(StringAt(current, 2, SH, ))
{
//germanic
if(StringAt((current + 1), 4, HEIM, HOEK, HOLM, HOLZ, ))
MetaphAdd(S);
else
MetaphAdd(X);
current += 2;
break;
}
//italian & armenian
if(StringAt(current, 3, SIO, SIA, ) OR StringAt(current, 4, SIAN, ))
{
if(!SlavoGermanic())
MetaphAdd(S, X);
else
MetaphAdd(S);
current += 3;
break;
}
//german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
//also, -sz- in slavic language altho in hungarian it is pronounced 's'
if(((current == 0)
AND StringAt((current + 1), 1, M, N, L, W, ))
OR StringAt((current + 1), 1, Z, ))
{
MetaphAdd(S, X);
if(StringAt((current + 1), 1, Z, ))
current += 2;
else
current += 1;
break;
}
if(StringAt(current, 2, SC, ))
{
//Schlesinger's rule
if(GetAt(current + 2) == 'H')
//dutch origin, e.g. 'school', 'schooner'
if(StringAt((current + 3), 2, OO, ER, EN, UY, ED, EM, ))
{
//'schermerhorn', 'schenker'
if(StringAt((current + 3), 2, ER, EN, ))
{
MetaphAdd(X, SK);
}else
MetaphAdd(SK);
current += 3;
break;
}else{
if((current == 0) AND !IsVowel(3) AND (GetAt(3) != 'W'))
MetaphAdd(X, S);
else
MetaphAdd(X);
current += 3;
break;
}
if(StringAt((current + 2), 1, I, E, Y, ))
{
MetaphAdd(S);
current += 3;
break;
}
//else
MetaphAdd(SK);
current += 3;
break;
}
//french e.g. 'resnais', 'artois'
if((current == last) AND StringAt((current - 2), 2, AI, OI, ))
MetaphAdd(, S);
else
MetaphAdd(S);
if(StringAt((current + 1), 1, S, Z, ))
current += 2;
else
current += 1;
break;
-
processT
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'T':
if(StringAt(current, 4, TION, ))
{
MetaphAdd(X);
current += 3;
break;
}
if(StringAt(current, 3, TIA, TCH, ))
{
MetaphAdd(X);
current += 3;
break;
}
if(StringAt(current, 2, TH, )
OR StringAt(current, 3, TTH, ))
{
//special case 'thomas', 'thames' or germanic
if(StringAt((current + 2), 2, OM, AM, )
OR StringAt(0, 4, VAN , VON , )
OR StringAt(0, 3, SCH, ))
{
MetaphAdd(T);
}else{
MetaphAdd(0, T);
}
current += 2;
break;
}
if(StringAt((current + 1), 1, T, D, ))
current += 2;
else
current += 1;
MetaphAdd(T);
break;
-
processV
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'V':
if(GetAt(current + 1) == 'V')
current += 2;
else
current += 1;
MetaphAdd(F);
break;
-
processW
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'W':
//can also be in middle of word
if(StringAt(current, 2, WR, ))
{
MetaphAdd(R);
current += 2;
break;
}
if((current == 0)
AND (IsVowel(current + 1) OR StringAt(current, 2, WH, )))
{
//Wasserman should match Vasserman
if(IsVowel(current + 1))
MetaphAdd(A, F);
else
//need Uomo to match Womo
MetaphAdd(A);
}
//Arnow should match Arnoff
if(((current == last) AND IsVowel(current - 1))
OR StringAt((current - 1), 5, EWSKI, EWSKY, OWSKI, OWSKY, )
OR StringAt(0, 3, SCH, ))
{
MetaphAdd(, F);
current +=1;
break;
}
//polish e.g. 'filipowicz'
if(StringAt(current, 4, WICZ, WITZ, ))
{
MetaphAdd(TS, FX);
current +=4;
break;
}
//else skip it
current +=1;
break;
-
processX
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'X':
//french e.g. breaux
if(!((current == last)
AND (StringAt((current - 3), 3, IAU, EAU, )
OR StringAt((current - 2), 2, AU, OU, ))) )
MetaphAdd(KS);
if(StringAt((current + 1), 1, C, X, ))
current += 2;
else
current += 1;
break;
-
processZ
-
http://aspell.sourceforge.net/metaphone/dmetaph.cpp
case 'Z':
//chinese pinyin e.g. 'zhao'
if(GetAt(current + 1) == 'H')
{
MetaphAdd(J);
current += 2;
break;
}else
if(StringAt((current + 1), 2, ZO, ZI, ZA, )
OR (SlavoGermanic() AND ((current > 0) AND GetAt(current - 1) != 'T')))
{
MetaphAdd(S, TS);
}
else
MetaphAdd(S);
if(GetAt(current + 1) == 'Z')
current += 2;
else
current += 1;
break;
|