Mega Code Archive

 
Categories / Delphi / Multimedia
 

Finding strings by how they sound using Metaphone Beta 1

Title: Finding strings by how they sound using Metaphone Beta 1 Question: Another algorithm for determining how a string sounds Answer: This article is replaced by the far better coded article at this address this implementation is buggy, however the urls described in the article are up to date and if you are interested in the algorithm they are a must check The code is below, I obtained it by translating the Metaphone.cc unit of the htDig search engine, it works in C well but the translation I made aint the better, Why? because I translated mostly using the C approach and not the Delphi one. I would also like to encourage research on a better (faster less code bloated) tanslation of this algorithm, I am working on one. If you happen to have a better translation post it. NOTES: This algorithm as well as soundex are english only so no unicode support, or support for , , and miscelaneous characters The MetaPhone algorithm function you see below strips any non alphabetic characters so dont worry. Also please remember that this algorithm is in Beta A description of the metaphone algorithm is available at this page also there is the double metaphone algorithm wich is also implemented on C and with a description at ASpell site UPDATE: There is already a metaphone implementation in delphi you can find it at SourceForge that version is far better than this Beta const MAXPHONEMELEN = 6; procedure Metaphone(Word: String; var Key: String); const vsvfn: array[65..90] of Integer = ( 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, {* A B C D E F G H I J K L M *} 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0); { N O P Q R S T U V W X Y Z } function vscode(x: Char): Integer; begin if ((x ='A') and (x 'Z')) then Result := vsvfn[ord(x)] else Result := 0; end; function Vowel(x: Char): Boolean; begin Result := StrUpper(@x)^ in ['A','E','I','O','U']; end; function Same(x: Char): Boolean; begin Result := vsvfn[ord(x)] mod 2 = 0; end; function Varson(x: Char): Boolean; begin Result := vsvfn[ord(x)] mod 4 = 0; end; function frontv(x: Char): Boolean; begin Result := StrUpper(@x)^ in ['E','I','Y']; end; function noghf(x: char): Boolean; begin Result := vsvfn[ord(x)] mod 16 = 0; end; function IsAlpha(x: Char): Boolean; begin Result := (Ord(StrUpper(@x)^) = 65) and (Ord(StrUpper(@x)^) 90); end; var i: Integer; Tmp: String; begin if (Length(Word) = 0) then Exit; Key := ''; {Copy word to internal buffer and drop any non alphabetic characters} Tmp := UpperCase(Word); for i := 1 to Length(Tmp) do if not IsAlpha(Tmp[i]) then Delete(Tmp, i, 1); {Now Check for PN, KN, GN, AE, WR, WH, and X at start} case Tmp[1] of //PN, KN , GN become N 'P', 'K', 'N': if Tmp[2] = 'N' then Delete(Tmp, 1, 1); //AE becomes E 'A': if Tmp[2] = 'E' then Delete(Tmp, 1, 1); //'WR' becomes 'R', and 'WH' to 'W' 'W': case Tmp[2] of 'R': Delete(Tmp, 1, 1); 'H': Delete(Tmp, 2, 1); end; //'X' becomes 'S' 'X': Tmp[1] := 'S'; end; { Now, loop step through string, stopping at end of string or when the computed 'metaph' is MAXPHONEMELEN characters long} i := 1; while (Length(Key) or (i = Length(Tmp)) do begin //Drop duplicates except for CC if (Tmp[i] = Tmp[i+1]) and (Tmp[i] 'C') then begin inc(i); Continue; end; //Check for F J L M N R or first letter vowel if Same(Tmp[i]) or Vowel(Tmp[i]) then Key := Key + Tmp[i] else begin case Tmp[i] of 'B': //unless in MB if (i 1) or (Tmp[i -1] 'M') then Key := Key + Tmp[i]; {X if in -CIA-, -CH- else S if in -CI-, -CE-, -CY- else dropped if in -SCI-, -SCE-, -SCY- else K} 'C': if ((i 1) and (Tmp[i-1]'S')) or not Frontv(Tmp[i+1]) then begin if (Tmp[i+1] = 'I') and (Tmp[i+2] = 'A') then begin Key := Key + 'X'; inc(i, 3); Continue; end else if FrontV(Tmp[i+1]) then Key := Key + 'S'; end; 'D'://J if in DGE or DGI or DGY else T if (Tmp[i+1] = 'G') and FrontV(Tmp[i+2]) then Key := Key + 'J' else Key := Key + 'T'; 'G': {F if in -GH and not B--GH, D--GH, -H--GH, -H---GH else dropped if -GNED, -GN, -DGE-, -DGI-, -DGY- else J if in -GE-, -GI-, -GY- and not GG else K} if (Tmp[i+1] 'G') or Vowel(Tmp[i+2]) and ((Tmp[i+1] 'N') or (i or (Tmp[i+2] 'E') or (Tmp[i+3] 'D') and ((Tmp[i -1] 'D') or not FrontV(Tmp[i+1]))) then begin if FrontV(Tmp[i+1]) and (Tmp[i+2] 'G') then Key := Key + 'J' else Key := Key + 'K'; end else if (Tmp[i+1] = 'H') AND (not Noghf(Tmp[i-3])) and (Tmp[i - 4]'H') then Key := Key + 'F'; //H if before a vowel and not after C, G, P, S, T //else dropped 'H': if ((i 1) and not Varson(Tmp[i-1]) and (((i 1) and not Vowel(Tmp[i-1]) or Vowel(Tmp[i-1])))) then Key := Key + 'H'; //dropped if after C else K 'K': if ((i 1) and (Tmp[i-1] 'C')) then key := key + 'K'; //F if before H, else P 'P': if (Tmp[i+1] = 'H') then begin Key := Key + 'F'; inc(i); end else Key := Key + 'P'; 'Q': Key := Key + 'K'; {X in -SH-, -SIO- or -SIA- else S} 'S': if (Tmp[i+1] = 'H') or ((Tmp[i+1] = 'I') and ((Tmp[i+1] = 'A') or (Tmp[i+1] = 'O'))) then begin if (Tmp[i+1] = 'H') then inc(i, 1); if (Tmp[i+1] = 'I') then inc(i, 2); Key := Key + 'X'; end else Key := Key + 'S'; { X in -TIA- or -TIO- else 0 (zero) before H else dropped if in -TCH- else T} 'T': if (Tmp[i+1] = 'I') and ((Tmp[i+2] = 'A') or (Tmp[i+2] = 'O')) then begin Key := Key + 'X'; inc(i, 3); Continue; end else if (Tmp[i+1] = 'H') then begin Key := '0'; inc(i, 2); Continue; end else if (Tmp[i+1] 'C') or (Tmp[i+2] = 'H') then begin Key := Key + 'T'; inc(i, 3); Continue; end; 'V': Key := Key + 'F'; 'X': if (Tmp[i-1] = ' ') then Key := Key + 'S' else Key := Key + 'KS'; 'Y': if Vowel(Tmp[i +1]) then begin inc(i); Key := Key + Tmp[i]; Continue; end; 'Z': Key := Key + 'S'; end; inc(i); end; end; end;