Author: Tomas Rutkauskas
Boyer-Moore string searching
Answer:
Solve 1:
1 unit BMSearch;
2
3 interface
4
5 type
6 {$IFDEF WINDOWS}
7 size_t = Word;
8 {$ELSE}
9 size_t = LongInt;
10 {$ENDIF}
11
12 type
13 TTranslationTable = array[char] of char; { translation table }
14 TSearchBM = class(TObject)
15 private
16 FTranslate: TTranslationTable; { translation table }
17 FJumpTable: array[char] of Byte; { Jumping table }
18 FShift_1: integer;
19 FPattern: pchar;
20 FPatternLen: size_t;
21 public
22 procedure Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean);
23 procedure PrepareStr(const Pattern: string; IgnoreCase: Boolean);
24 function Search(Text: pchar; TextLen: size_t): pchar;
25 function Pos(const S: string): integer;
26 end;
27
28 implementation
29
30 uses
31 SysUtils;
32
33 {Ignore Case Table Translation}
34
35 procedure CreateTranslationTable(var T: TTranslationTable; IgnoreCase: Boolean);
36 var
37 c: char;
38 begin
39 for c := #0 to #255 do
40 T[c] := c;
41 if not IgnoreCase then
42 exit;
43 for c := 'a' to 'z' do
44 T[c] := UpCase(c);
45
46 { Mapping all accented characters to their uppercase equivalent }
47
48 T['Á'] := 'A';
49 T['À'] := 'A';
50 T['Ä'] := 'A';
51 T['Â'] := 'A';
52
53 T['á'] := 'A';
54 T['à'] := 'A';
55 T['ä'] := 'A';
56 T['â'] := 'A';
57
58 T['É'] := 'E';
59 T['È'] := 'E';
60 T['Ë'] := 'E';
61 T['Ê'] := 'E';
62
63 T['é'] := 'E';
64 T['è'] := 'E';
65 T['ë'] := 'E';
66 T['ê'] := 'E';
67
68 T['Í'] := 'I';
69 T['Ì'] := 'I';
70 T['Ï'] := 'I';
71 T['Î'] := 'I';
72
73 T['í'] := 'I';
74 T['ì'] := 'I';
75 T['ï'] := 'I';
76 T['î'] := 'I';
77
78 T['Ó'] := 'O';
79 T['Ò'] := 'O';
80 T['Ö'] := 'O';
81 T['Ô'] := 'O';
82
83 T['ó'] := 'O';
84 T['ò'] := 'O';
85 T['ö'] := 'O';
86 T['ô'] := 'O';
87
88 T['Ú'] := 'U';
89 T['Ù'] := 'U';
90 T['Ü'] := 'U';
91 T['Û'] := 'U';
92
93 T['ú'] := 'U';
94 T['ù'] := 'U';
95 T['ü'] := 'U';
96 T['û'] := 'U';
97
98 T['ñ'] := 'Ñ';
99 end;
100
101 {Preparation of the jumping table}
102
103 procedure TSearchBM.Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase:
104 Boolean);
105 var
106 i: integer;
107 c, lastc: char;
108 begin
109 FPattern := Pattern;
110 FPatternLen := PatternLen;
111 if FPatternLen < 1 then
112 FPatternLen := strlen(FPattern);
113 {This algorythm is based on a character set of 256}
114 if FPatternLen > 256 then
115 exit;
116 {1. Preparing translating table}
117 CreateTranslationTable(FTranslate, IgnoreCase);
118 {2. Preparing jumping table}
119 for c := #0 to #255 do
120 FJumpTable[c] := FPatternLen;
121 for i := FPatternLen - 1 downto 0 do
122 begin
123 c := FTranslate[FPattern[i]];
124 if FJumpTable[c] >= FPatternLen - 1 then
125 FJumpTable[c] := FPatternLen - 1 - i;
126 end;
127 FShift_1 := FPatternLen - 1;
128 lastc := FTranslate[Pattern[FPatternLen - 1]];
129 for i := FPatternLen - 2 downto 0 do
130 if FTranslate[FPattern[i]] = lastc then
131 begin
132 FShift_1 := FPatternLen - 1 - i;
133 break;
134 end;
135 if FShift_1 = 0 then
136 FShift_1 := 1;
137 end;
138
139 procedure TSearchBM.PrepareStr(const Pattern: string; IgnoreCase: Boolean);
140 var
141 str: pchar;
142 begin
143 if Pattern <> '' then
144 begin
145 {$IFDEF Windows}
146 str := @Pattern[1];
147 {$ELSE}
148 str := pchar(Pattern);
149 {$ENDIF}
150 Prepare(str, Length(Pattern), IgnoreCase);
151 end;
152 end;
153
154 {Searching Last char & scanning right to left}
155
156 function TSearchBM.Search(Text: pchar; TextLen: size_t): pchar;
157 var
158 shift, m1, j: integer;
159 jumps: size_t;
160 begin
161 result := nil;
162 if FPatternLen > 256 then
163 exit;
164 if TextLen < 1 then
165 TextLen := strlen(Text);
166 m1 := FPatternLen - 1;
167 shift := 0;
168 jumps := 0;
169 {Searching the last character}
170 while jumps <= TextLen do
171 begin
172 Inc(Text, shift);
173 shift := FJumpTable[FTranslate[Text^]];
174 while shift <> 0 do
175 begin
176 Inc(jumps, shift);
177 if jumps > TextLen then
178 exit;
179 Inc(Text, shift);
180 shift := FJumpTable[FTranslate[Text^]];
181 end;
182 { Compare right to left FPatternLen - 1 characters }
183 if jumps >= m1 then
184 begin
185 j := 0;
186 while FTranslate[FPattern[m1 - j]] = FTranslate[(Text - j)^] do
187 begin
188 Inc(j);
189 if j = FPatternLen then
190 begin
191 result := Text - m1;
192 exit;
193 end;
194 end;
195 end;
196 shift := FShift_1;
197 Inc(jumps, shift);
198 end;
199 end;
200
201 function TSearchBM.Pos(const S: string): integer;
202 var
203 str, p: pchar;
204 begin
205 result := 0;
206 if S <> '' then
207 begin
208 {$IFDEF Windows}
209 str := @S[1];
210 {$ELSE}
211 str := pchar(S);
212 {$ENDIF}
213 p := Search(str, Length(S));
214 if p <> nil then
215 result := 1 + p - str;
216 end;
217 end;
218
219 end.
Solve 2:
Here's a demo program of the Boyer-Moore search algorithm. The basic idea is to
first create a Boyer-Moore index table for the string you want to search for, and
then call the BMsearch routine. Remember to turn-off Range Checking {$R-} in your
finished program, otherwise the BMSearch will take 3-4 times longer than it should.
220
221 {Public-domain demo of Boyer-Moore search algorithm.
222 Guy McLoughlin - May 1, 1993.}
223
224 program DemoBMSearch;
225
226 {Boyer-Moore index table data definition}
227 type
228 BMTable = array[0..127] of byte;
229
230 {Create a Boyer-Moore index table to search with.}
231
232 procedure Create_BMTable(Pattern: string; var BMT: BMTable);
233 var
234 Index: byte;
235 begin
236 fillchar(BMT, sizeof(BMT), length(Pattern));
237 for Index := 1 to length(Pattern) do
238 BMT[ord(Pattern[Index])] := (length(Pattern) - Index)
239 end;
240
241 {Boyer-Moore Search function. Returns 0 if string is not found. Returns 65,535 if
242 BufferSize is too large, ie: greater than 65,520 bytes.}
243
244 function BMsearch(var Buffer; BuffSize: word; var BMT: BMTable; Pattern: string):
245 word;
246 var
247 Buffer2: array[1..65520] of char absolute Buffer;
248 Index1, Index2, PatSize: word;
249 begin
250 if (BuffSize > 65520) then
251 begin
252 BMsearch := $FFFF;
253 exit
254 end;
255 PatSize := length(Pattern);
256 Index1 := PatSize;
257 Index2 := PatSize;
258 repeat
259 if (Buffer2[Index1] = Pattern[Index2]) then
260 begin
261 dec(Index1);
262 dec(Index2)
263 end
264 else
265 begin
266 if (succ(PatSize - Index2) > (BMT[ord(Buffer2[Index1])])) then
267 inc(Index1, succ(PatSize - Index2))
268 else
269 inc(Index1, BMT[ord(Buffer2[Index1])]);
270 Index2 := PatSize
271 end;
272 until
273 (Index2 < 1) or (Index1 > BuffSize);
274 if (Index1 > BuffSize) then
275 BMsearch := 0
276 else
277 BMsearch := succ(Index1)
278 end;
279
280 type
281 arby_64K = array[1..65520] of byte;
282
283 var
284 Index: word;
285 st_Temp: string[10];
286 Buffer: ^arby_64K;
287 BMT: BMTable;
288
289 begin
290 new(Buffer);
291 fillchar(Buffer^, sizeof(Buffer^), 0);
292 st_Temp := 'Gumby';
293 move(st_Temp[1], Buffer^[65516], length(st_Temp));
294 Create_BMTable(st_Temp, BMT);
295 Index := BMSearch(Buffer^, sizeof(Buffer^), BMT, st_Temp);
296 writeln(st_Temp, ' found at offset ', Index)
297 end.
|