Author: Tomas Rutkauskas
How to search for a string using the Soundex algorithm
Answer:
Solve 1:
1 unit SndxAlgs;
2
3 interface
4
5 uses
6 SysUtils;
7
8 function Soundex(in_str: string): string;
9 function NumericSoundex(in_str: string): Smallint;
10 function ExtendedSoundex(in_str: string): string;
11
12 implementation
13
14 {Calculate a normal Soundex encoding.}
15
16 function Soundex(in_str: string): string;
17 var
18 no_vowels, coded, out_str: string;
19 ch: Char;
20 i: Integer;
21 begin
22 {Make upper case and remove leading and trailing spaces.}
23 in_str := Trim(UpperCase(in_str));
24 {Remove vowels, spaces, H, W, and Y except for the first character.}
25 no_vowels := in_str[1];
26 for i := 2 to Length(in_str) do
27 begin
28 ch := in_str[i];
29 case ch of
30 'A', 'E', 'I', 'O', 'U', ' ', 'H', 'W', 'Y':
31 ; {Do nothing.}
32 else
33 no_vowels := no_vowels + ch;
34 end;
35 end;
36 {Encode the characters.}
37 for i := 1 to Length(no_vowels) do
38 begin
39 ch := no_vowels[i];
40 case ch of
41 'B', 'F', 'P', 'V': ch := '1';
42 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z': ch := '2';
43 'D', 'T': ch := '3';
44 'L': ch := '4';
45 'M', 'N': ch := '5';
46 'R': ch := '6';
47 else {Vowels, H, W, and Y as the 1st letter.}
48 ch := '0';
49 end;
50 coded := coded + ch;
51 end;
52 {Use the first letter.}
53 out_str := no_vowels[1];
54 {Find three non-repeating codes.}
55 for i := 2 to Length(no_vowels) do
56 begin
57 {Look for a non-repeating code.}
58 if (coded[i] <> coded[i - 1]) then
59 begin
60 {This one works.}
61 out_str := out_str + coded[i];
62 if (Length(out_str) >= 4) then
63 Break;
64 end;
65 end;
66 Soundex := out_str;
67 end;
68
69 {Calculate a numeric Soundex encoding.}
70
71 function NumericSoundex(in_str: string): Smallint;
72 var
73 value: Integer;
74 begin
75 {Calculate the normal Soundex encoding.}
76 in_str := Soundex(in_str);
77 {Convert this into a numeric value.}
78 value := (Ord(in_str[1]) - Ord('A')) * 1000;
79 if (Length(in_str) > 1) then
80 value := value + StrToInt(Copy(in_str, 2, Length(in_str) - 1));
81 NumericSoundex := value;
82 end;
83
84 {Calculate an extended Soundex encoding.}
85
86 function ExtendedSoundex(in_str: string): string;
87
88 {Replace instances of fr_str with to_str in str.}
89 procedure ReplaceString(var str: string; fr_str, to_str: string);
90 var
91 fr_len, i: Integer;
92 begin
93 fr_len := Length(fr_str);
94 i := Pos(fr_str, str);
95 while (i > 0) do
96 begin
97 str := Copy(str, 1, i - 1) + to_str + Copy(str, i + fr_len, Length(str) - i -
98 fr_len + 1);
99 i := Pos(fr_str, str);
100 end;
101 end;
102
103 var
104 no_vowels: string;
105 ch, last_ch: Char;
106 i: Integer;
107 begin
108 {Make upper case and remove leading and trailing spaces.}
109 in_str := Trim(UpperCase(in_str));
110 {Remove internal spaces.}
111 ReplaceString(in_str, ' ', '');
112 {Convert CHR to CR.}
113 ReplaceString(in_str, 'CHR', 'CR');
114 {Convert PH to F.}
115 ReplaceString(in_str, 'PH', 'F');
116 {Convert Z to S.}
117 ReplaceString(in_str, 'Z', 'S');
118 {Remove vowels and repeats.}
119 last_ch := in_str[1]; {The last character used.}
120 no_vowels := last_ch;
121 for i := 2 to Length(in_str) do
122 begin
123 ch := in_str[i];
124 case ch of
125 'A', 'E', 'I', 'O', 'U':
126 ; {Do nothing.}
127 else
128 {Skip it if it's a duplicate.}
129 if (ch <> last_ch) then
130 begin
131 no_vowels := no_vowels + ch;
132 last_ch := ch;
133 end;
134 end;
135 end;
136 ExtendedSoundex := no_vowels;
137 end;
138
139 end.
140
141 //Used like this:
142
143 unit Sndx;
144
145 interface
146
147 uses
148 Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
149 StdCtrls, ExtCtrls, SndxAlgs;
150
151 type
152 TForm1 = class(TForm)
153 InputText: TEdit;
154 Label1: TLabel;
155 CmdEncode: TButton;
156 Label2: TLabel;
157 Label3: TLabel;
158 Panel1: TPanel;
159 SoundexLabel: TLabel;
160 Panel2: TPanel;
161 NumericLabel: TLabel;
162 Label4: TLabel;
163 Panel3: TPanel;
164 ExtendedLabel: TLabel;
165 procedure CmdEncodeClick(Sender: TObject);
166 private
167 { Private declarations }
168 public
169 { Public declarations }
170 end;
171
172 var
173 Form1: TForm1;
174
175 implementation
176
177 {$R *.DFM}
178
179 procedure TForm1.CmdEncodeClick(Sender: TObject);
180 begin
181 SoundexLabel.Caption := Soundex(InputText.Text);
182 NumericLabel.Caption := Format('%d', [NumericSoundex(InputText.Text)]);
183 ExtendedLabel.Caption := ExtendedSoundex(InputText.Text);
184 end;
185
186 end.
Solve 2:
The code below is designed for use in English language and does not work for
special characters like French accents or German Umlauts
187
188 function StrSoundEx(const OrgString: string): string;
189 var
190 s: string;
191 PrevCh: Char;
192 Ch: Char;
193 i: Integer;
194 begin
195 s := UpperCase(Trim(OrgString));
196 if s <> '' then
197 begin
198 PrevCh := #0;
199 result := s[1];
200 for i := 2 to Length(s) do
201 begin
202 if Length(result) = 4 then
203 break;
204 Ch := s[i];
205 if (Ch <> PrevCh) then
206 begin
207 if Ch in ['B', 'P', 'F', 'V'] then
208 result := result + '1'
209 else if Ch in ['C', 'S', 'K', 'G', 'J', 'Q', 'X', 'Z'] then
210 result := result + '2'
211 else if Ch in ['D', 'T'] then
212 result := result + '3'
213 else if Ch in ['L'] then
214 result := result + '4'
215 else if Ch in ['M', 'N'] then
216 result := result + '5'
217 else if Ch in ['R'] then
218 result := result + '6';
219 PrevCh := Ch;
220 end;
221 end;
222 end;
223 while Length(result) < 4 do
224 result := result + '0';
225 end;
Solve 3:
The following differs from the standard Russell Soundex algorithm in that it lets
you set the size of the Soundex code to something other than four characters:
226
227 {Given a string this fuction returns the Russell Soundex code for that string.
228 Although the Russell Soundex code is limited to four characters this function
229 allows you to get a code up to 16 characters in length. For names a six to eight
230 character code reduces the number of false matches significantly.
231
232 Parameters:
233 TheWord: The string to be encoded.
234 SoundexSize: The number of characters in the returned code.
235
236 Returns: The Soundex code.}
237
238 function dgGetSoundexCode(TheWord: string; SoundexSize: Integer): string;
239 const
240 MaxSize = 16;
241 var
242 I: Integer;
243 WorkString1, WorkString2: string;
244 begin
245 {Raise an exception if the SoundexSize parameter is not in the allowed range}
246 if not SoundexSize in [1..MaxSize] then
247 raise Exception.Create('Soundex size must in the range 1 - 16.');
248 {Convert the word to upper case}
249 TheWord := UpperCase(TheWord);
250 {Copy the first letter}
251 WorkString1 := TheWord[1];
252 {Copy the rest of the word to WordString1 deleting duplicate letters}
253 for I := 2 to Length(TheWord) do
254 if TheWord[I - 1] <> TheWord[I] then
255 AppendStr(WorkString1, TheWord[I]);
256 {Move the first letter to WorkString2}
257 WorkString2 := WorkString1[1];
258 {Compute the Soundex codes for the remaining letters}
259 for I := 2 to Length(WorkString1) do
260 case WorkString1[I] of
261 'B', 'F', 'P', 'V':
262 AppendStr(WorkString2, '1');
263 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
264 Appendstr(WorkString2, '2');
265 'D', 'T':
266 Appendstr(WorkString2, '3');
267 'L':
268 Appendstr(WorkString2, '4');
269 'M', 'N':
270 Appendstr(WorkString2, '5');
271 'R':
272 Appendstr(WorkString2, '6');
273 end;
274 {Pad the string with zeros}
275 WorkString1 := '';
276 WorkString1 := dgFillString('0', MaxSize);
277 AppendStr(WorkString2, WorkString1);
278 Result := Copy(WorkString2, 1, SoundexSize);
279 end;
|