Articles   Members Online:
-Article/Tip Search
-News Group Search over 21 Million news group articles.
-Delphi/Pascal
-CBuilder/C++
-C#Builder/C#
-JBuilder/Java
-Kylix
Member Area
-Home
-Account Center
-Top 10 NEW!!
-Submit Article/Tip
-Forums Upgraded!!
-My Articles
-Edit Information
-Login/Logout
-Become a Member
-Why sign up!
-Newsletter
-Chat Online!
-Indexes NEW!!
Employment
-Build your resume
-Find a job
-Post a job
-Resume Search
Contacts
-Contacts
-Feedbacks
-Link to us
-Privacy/Disclaimer
Embarcadero
Visit Embarcadero
Embarcadero Community
JEDI
Links
How to search for a string using the Soundex algorithm Turn on/off line numbers in source code. Switch to Orginial background IDE or DSP color Comment or reply to this aritlce/tip for discussion. Bookmark this article to my favorite article(s). Print this article
30-Aug-02
Category
Algorithm
Language
Delphi All Versions
Views
96
User Rating
No Votes
# Votes
0
Replies
0
Publisher:
DSP, Administrator
Reference URL:
DKB
			Author: Tomas Rutkauskas

How to search for a string using the Soundex algorithm

Answer:

Solve 1:

1   unit SndxAlgs;
2   
3   interface
4   
5   uses
6     SysUtils;
7   
8   function Soundex(in_str: string): string;
9   function NumericSoundex(in_str: string): Smallint;
10  function ExtendedSoundex(in_str: string): string;
11  
12  implementation
13  
14  {Calculate a normal Soundex encoding.}
15  
16  function Soundex(in_str: string): string;
17  var
18    no_vowels, coded, out_str: string;
19    ch: Char;
20    i: Integer;
21  begin
22    {Make upper case and remove leading and trailing spaces.}
23    in_str := Trim(UpperCase(in_str));
24    {Remove vowels, spaces, H, W, and Y except for the first character.}
25    no_vowels := in_str[1];
26    for i := 2 to Length(in_str) do
27    begin
28      ch := in_str[i];
29      case ch of
30        'A', 'E', 'I', 'O', 'U', ' ', 'H', 'W', 'Y':
31          ; {Do nothing.}
32      else
33        no_vowels := no_vowels + ch;
34      end;
35    end;
36    {Encode the characters.}
37    for i := 1 to Length(no_vowels) do
38    begin
39      ch := no_vowels[i];
40      case ch of
41        'B', 'F', 'P', 'V': ch := '1';
42        'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z': ch := '2';
43        'D', 'T': ch := '3';
44        'L': ch := '4';
45        'M', 'N': ch := '5';
46        'R': ch := '6';
47      else {Vowels, H, W, and Y as the 1st letter.}
48        ch := '0';
49      end;
50      coded := coded + ch;
51    end;
52    {Use the first letter.}
53    out_str := no_vowels[1];
54    {Find three non-repeating codes.}
55    for i := 2 to Length(no_vowels) do
56    begin
57      {Look for a non-repeating code.}
58      if (coded[i] <> coded[i - 1]) then
59      begin
60        {This one works.}
61        out_str := out_str + coded[i];
62        if (Length(out_str) >= 4) then
63          Break;
64      end;
65    end;
66    Soundex := out_str;
67  end;
68  
69  {Calculate a numeric Soundex encoding.}
70  
71  function NumericSoundex(in_str: string): Smallint;
72  var
73    value: Integer;
74  begin
75    {Calculate the normal Soundex encoding.}
76    in_str := Soundex(in_str);
77    {Convert this into a numeric value.}
78    value := (Ord(in_str[1]) - Ord('A')) * 1000;
79    if (Length(in_str) > 1) then
80      value := value + StrToInt(Copy(in_str, 2, Length(in_str) - 1));
81    NumericSoundex := value;
82  end;
83  
84  {Calculate an extended Soundex encoding.}
85  
86  function ExtendedSoundex(in_str: string): string;
87  
88  {Replace instances of fr_str with to_str in str.}
89    procedure ReplaceString(var str: string; fr_str, to_str: string);
90    var
91      fr_len, i: Integer;
92    begin
93      fr_len := Length(fr_str);
94      i := Pos(fr_str, str);
95      while (i > 0) do
96      begin
97        str := Copy(str, 1, i - 1) + to_str + Copy(str, i + fr_len, Length(str) - i - 
98  fr_len + 1);
99        i := Pos(fr_str, str);
100     end;
101   end;
102 
103 var
104   no_vowels: string;
105   ch, last_ch: Char;
106   i: Integer;
107 begin
108   {Make upper case and remove leading and trailing spaces.}
109   in_str := Trim(UpperCase(in_str));
110   {Remove internal spaces.}
111   ReplaceString(in_str, ' ', '');
112   {Convert CHR to CR.}
113   ReplaceString(in_str, 'CHR', 'CR');
114   {Convert PH to F.}
115   ReplaceString(in_str, 'PH', 'F');
116   {Convert Z to S.}
117   ReplaceString(in_str, 'Z', 'S');
118   {Remove vowels and repeats.}
119   last_ch := in_str[1]; {The last character used.}
120   no_vowels := last_ch;
121   for i := 2 to Length(in_str) do
122   begin
123     ch := in_str[i];
124     case ch of
125       'A', 'E', 'I', 'O', 'U':
126         ; {Do nothing.}
127     else
128       {Skip it if it's a duplicate.}
129       if (ch <> last_ch) then
130       begin
131         no_vowels := no_vowels + ch;
132         last_ch := ch;
133       end;
134     end;
135   end;
136   ExtendedSoundex := no_vowels;
137 end;
138 
139 end.
140 
141 //Used like this:
142 
143 unit Sndx;
144 
145 interface
146 
147 uses
148   Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
149   StdCtrls, ExtCtrls, SndxAlgs;
150 
151 type
152   TForm1 = class(TForm)
153     InputText: TEdit;
154     Label1: TLabel;
155     CmdEncode: TButton;
156     Label2: TLabel;
157     Label3: TLabel;
158     Panel1: TPanel;
159     SoundexLabel: TLabel;
160     Panel2: TPanel;
161     NumericLabel: TLabel;
162     Label4: TLabel;
163     Panel3: TPanel;
164     ExtendedLabel: TLabel;
165     procedure CmdEncodeClick(Sender: TObject);
166   private
167     { Private declarations }
168   public
169     { Public declarations }
170   end;
171 
172 var
173   Form1: TForm1;
174 
175 implementation
176 
177 {$R *.DFM}
178 
179 procedure TForm1.CmdEncodeClick(Sender: TObject);
180 begin
181   SoundexLabel.Caption := Soundex(InputText.Text);
182   NumericLabel.Caption := Format('%d', [NumericSoundex(InputText.Text)]);
183   ExtendedLabel.Caption := ExtendedSoundex(InputText.Text);
184 end;
185 
186 end.



Solve 2:

The code below is designed for use in English language and does not work for 
special characters like French accents or German Umlauts
187 
188 function StrSoundEx(const OrgString: string): string;
189 var
190   s: string;
191   PrevCh: Char;
192   Ch: Char;
193   i: Integer;
194 begin
195   s := UpperCase(Trim(OrgString));
196   if s <> '' then
197   begin
198     PrevCh := #0;
199     result := s[1];
200     for i := 2 to Length(s) do
201     begin
202       if Length(result) = 4 then
203         break;
204       Ch := s[i];
205       if (Ch <> PrevCh) then
206       begin
207         if Ch in ['B', 'P', 'F', 'V'] then
208           result := result + '1'
209         else if Ch in ['C', 'S', 'K', 'G', 'J', 'Q', 'X', 'Z'] then
210           result := result + '2'
211         else if Ch in ['D', 'T'] then
212           result := result + '3'
213         else if Ch in ['L'] then
214           result := result + '4'
215         else if Ch in ['M', 'N'] then
216           result := result + '5'
217         else if Ch in ['R'] then
218           result := result + '6';
219         PrevCh := Ch;
220       end;
221     end;
222   end;
223   while Length(result) < 4 do
224     result := result + '0';
225 end;



Solve 3:

The following differs from the standard Russell Soundex algorithm in that it lets 
you set the size of the Soundex code to something other than four characters:
226 
227 {Given a string this fuction returns the Russell Soundex code for that string. 
228 Although the Russell Soundex code is limited to four characters this function 
229 allows you to get a code up to 16 characters in length. For names a six to eight 
230 character code reduces the number of false matches significantly.
231 
232 Parameters:
233 TheWord: The string to be encoded.
234 SoundexSize: The number of characters in the returned code.
235 
236 Returns: The Soundex code.}
237 
238 function dgGetSoundexCode(TheWord: string; SoundexSize: Integer): string;
239 const
240   MaxSize = 16;
241 var
242   I: Integer;
243   WorkString1, WorkString2: string;
244 begin
245   {Raise an exception if the SoundexSize parameter is not in the allowed range}
246   if not SoundexSize in [1..MaxSize] then
247     raise Exception.Create('Soundex size must in the range 1 - 16.');
248   {Convert the word to upper case}
249   TheWord := UpperCase(TheWord);
250   {Copy the first letter}
251   WorkString1 := TheWord[1];
252   {Copy the rest of the word to WordString1 deleting duplicate letters}
253   for I := 2 to Length(TheWord) do
254     if TheWord[I - 1] <> TheWord[I] then
255       AppendStr(WorkString1, TheWord[I]);
256   {Move the first letter to WorkString2}
257   WorkString2 := WorkString1[1];
258   {Compute the Soundex codes for the remaining letters}
259   for I := 2 to Length(WorkString1) do
260     case WorkString1[I] of
261       'B', 'F', 'P', 'V':
262         AppendStr(WorkString2, '1');
263       'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
264         Appendstr(WorkString2, '2');
265       'D', 'T':
266         Appendstr(WorkString2, '3');
267       'L':
268         Appendstr(WorkString2, '4');
269       'M', 'N':
270         Appendstr(WorkString2, '5');
271       'R':
272         Appendstr(WorkString2, '6');
273     end;
274   {Pad the string with zeros}
275   WorkString1 := '';
276   WorkString1 := dgFillString('0', MaxSize);
277   AppendStr(WorkString2, WorkString1);
278   Result := Copy(WorkString2, 1, SoundexSize);
279 end;


			
Vote: How useful do you find this Article/Tip?
Bad Excellent
1 2 3 4 5 6 7 8 9 10

 

Advertisement
Share this page
Advertisement
Download from Google

Copyright © Mendozi Enterprises LLC