Articles   Members Online:
-Article/Tip Search
-News Group Search over 21 Million news group articles.
-Delphi/Pascal
-CBuilder/C++
-C#Builder/C#
-JBuilder/Java
-Kylix
Member Area
-Home
-Account Center
-Top 10 NEW!!
-Submit Article/Tip
-Forums Upgraded!!
-My Articles
-Edit Information
-Login/Logout
-Become a Member
-Why sign up!
-Newsletter
-Chat Online!
-Indexes NEW!!
Employment
-Build your resume
-Find a job
-Post a job
-Resume Search
Contacts
-Contacts
-Feedbacks
-Link to us
-Privacy/Disclaimer
Embarcadero
Visit Embarcadero
Embarcadero Community
JEDI
Links
Boyer-Moore string searching Turn on/off line numbers in source code. Switch to Orginial background IDE or DSP color Comment or reply to this aritlce/tip for discussion. Bookmark this article to my favorite article(s). Print this article
30-Aug-02
Category
Algorithm
Language
Delphi All Versions
Views
71
User Rating
No Votes
# Votes
0
Replies
0
Publisher:
DSP, Administrator
Reference URL:
DKB
			Author: Tomas Rutkauskas

Boyer-Moore string searching

Answer:

Solve 1:

1   unit BMSearch;
2   
3   interface
4   
5   type
6   {$IFDEF WINDOWS}
7     size_t = Word;
8   {$ELSE}
9     size_t = LongInt;
10  {$ENDIF}
11  
12  type
13    TTranslationTable = array[char] of char; { translation table }
14    TSearchBM = class(TObject)
15    private
16      FTranslate: TTranslationTable; { translation table }
17      FJumpTable: array[char] of Byte; { Jumping table }
18      FShift_1: integer;
19      FPattern: pchar;
20      FPatternLen: size_t;
21    public
22      procedure Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: Boolean);
23      procedure PrepareStr(const Pattern: string; IgnoreCase: Boolean);
24      function Search(Text: pchar; TextLen: size_t): pchar;
25      function Pos(const S: string): integer;
26    end;
27  
28  implementation
29  
30  uses
31    SysUtils;
32  
33  {Ignore Case Table Translation}
34  
35  procedure CreateTranslationTable(var T: TTranslationTable; IgnoreCase: Boolean);
36  var
37    c: char;
38  begin
39    for c := #0 to #255 do
40      T[c] := c;
41    if not IgnoreCase then
42      exit;
43    for c := 'a' to 'z' do
44      T[c] := UpCase(c);
45  
46    { Mapping all accented characters to their uppercase equivalent }
47  
48    T[''] := 'A';
49    T[''] := 'A';
50    T[''] := 'A';
51    T[''] := 'A';
52  
53    T[''] := 'A';
54    T[''] := 'A';
55    T[''] := 'A';
56    T[''] := 'A';
57  
58    T[''] := 'E';
59    T[''] := 'E';
60    T[''] := 'E';
61    T[''] := 'E';
62  
63    T[''] := 'E';
64    T[''] := 'E';
65    T[''] := 'E';
66    T[''] := 'E';
67  
68    T[''] := 'I';
69    T[''] := 'I';
70    T[''] := 'I';
71    T[''] := 'I';
72  
73    T[''] := 'I';
74    T[''] := 'I';
75    T[''] := 'I';
76    T[''] := 'I';
77  
78    T[''] := 'O';
79    T[''] := 'O';
80    T[''] := 'O';
81    T[''] := 'O';
82  
83    T[''] := 'O';
84    T[''] := 'O';
85    T[''] := 'O';
86    T[''] := 'O';
87  
88    T[''] := 'U';
89    T[''] := 'U';
90    T[''] := 'U';
91    T[''] := 'U';
92  
93    T[''] := 'U';
94    T[''] := 'U';
95    T[''] := 'U';
96    T[''] := 'U';
97  
98    T[''] := '';
99  end;
100 
101 {Preparation of the jumping table}
102 
103 procedure TSearchBM.Prepare(Pattern: pchar; PatternLen: size_t; IgnoreCase: 
104 Boolean);
105 var
106   i: integer;
107   c, lastc: char;
108 begin
109   FPattern := Pattern;
110   FPatternLen := PatternLen;
111   if FPatternLen < 1 then
112     FPatternLen := strlen(FPattern);
113   {This algorythm is based on a character set of 256}
114   if FPatternLen > 256 then
115     exit;
116   {1. Preparing translating table}
117   CreateTranslationTable(FTranslate, IgnoreCase);
118   {2. Preparing jumping table}
119   for c := #0 to #255 do
120     FJumpTable[c] := FPatternLen;
121   for i := FPatternLen - 1 downto 0 do
122   begin
123     c := FTranslate[FPattern[i]];
124     if FJumpTable[c] >= FPatternLen - 1 then
125       FJumpTable[c] := FPatternLen - 1 - i;
126   end;
127   FShift_1 := FPatternLen - 1;
128   lastc := FTranslate[Pattern[FPatternLen - 1]];
129   for i := FPatternLen - 2 downto 0 do
130     if FTranslate[FPattern[i]] = lastc then
131     begin
132       FShift_1 := FPatternLen - 1 - i;
133       break;
134     end;
135   if FShift_1 = 0 then
136     FShift_1 := 1;
137 end;
138 
139 procedure TSearchBM.PrepareStr(const Pattern: string; IgnoreCase: Boolean);
140 var
141   str: pchar;
142 begin
143   if Pattern <> '' then
144   begin
145 {$IFDEF Windows}
146     str := @Pattern[1];
147 {$ELSE}
148     str := pchar(Pattern);
149 {$ENDIF}
150     Prepare(str, Length(Pattern), IgnoreCase);
151   end;
152 end;
153 
154 {Searching Last char & scanning right to left}
155 
156 function TSearchBM.Search(Text: pchar; TextLen: size_t): pchar;
157 var
158   shift, m1, j: integer;
159   jumps: size_t;
160 begin
161   result := nil;
162   if FPatternLen > 256 then
163     exit;
164   if TextLen < 1 then
165     TextLen := strlen(Text);
166   m1 := FPatternLen - 1;
167   shift := 0;
168   jumps := 0;
169   {Searching the last character}
170   while jumps <= TextLen do
171   begin
172     Inc(Text, shift);
173     shift := FJumpTable[FTranslate[Text^]];
174     while shift <> 0 do
175     begin
176       Inc(jumps, shift);
177       if jumps > TextLen then
178         exit;
179       Inc(Text, shift);
180       shift := FJumpTable[FTranslate[Text^]];
181     end;
182     { Compare right to left FPatternLen - 1 characters }
183     if jumps >= m1 then
184     begin
185       j := 0;
186       while FTranslate[FPattern[m1 - j]] = FTranslate[(Text - j)^] do
187       begin
188         Inc(j);
189         if j = FPatternLen then
190         begin
191           result := Text - m1;
192           exit;
193         end;
194       end;
195     end;
196     shift := FShift_1;
197     Inc(jumps, shift);
198   end;
199 end;
200 
201 function TSearchBM.Pos(const S: string): integer;
202 var
203   str, p: pchar;
204 begin
205   result := 0;
206   if S <> '' then
207   begin
208 {$IFDEF Windows}
209     str := @S[1];
210 {$ELSE}
211     str := pchar(S);
212 {$ENDIF}
213     p := Search(str, Length(S));
214     if p <> nil then
215       result := 1 + p - str;
216   end;
217 end;
218 
219 end.



Solve 2:

Here's a demo program of the Boyer-Moore search algorithm. The basic idea is to 
first create a Boyer-Moore index table for the string you want to search for, and 
then call the BMsearch routine. Remember to turn-off Range Checking {$R-} in your 
finished program, otherwise the BMSearch will take 3-4 times longer than it should.

220 
221 {Public-domain demo of Boyer-Moore search algorithm.
222 Guy McLoughlin - May 1, 1993.}
223 
224 program DemoBMSearch;
225 
226 {Boyer-Moore index table data definition}
227 type
228   BMTable = array[0..127] of byte;
229 
230   {Create a Boyer-Moore index table to search with.}
231 
232 procedure Create_BMTable(Pattern: string; var BMT: BMTable);
233 var
234   Index: byte;
235 begin
236   fillchar(BMT, sizeof(BMT), length(Pattern));
237   for Index := 1 to length(Pattern) do
238     BMT[ord(Pattern[Index])] := (length(Pattern) - Index)
239 end;
240 
241 {Boyer-Moore Search function. Returns 0 if string is not found. Returns 65,535 if
242 BufferSize is too large, ie: greater than 65,520 bytes.}
243 
244 function BMsearch(var Buffer; BuffSize: word; var BMT: BMTable; Pattern: string): 
245 word;
246 var
247   Buffer2: array[1..65520] of char absolute Buffer;
248   Index1, Index2, PatSize: word;
249 begin
250   if (BuffSize > 65520) then
251   begin
252     BMsearch := $FFFF;
253     exit
254   end;
255   PatSize := length(Pattern);
256   Index1 := PatSize;
257   Index2 := PatSize;
258   repeat
259     if (Buffer2[Index1] = Pattern[Index2]) then
260     begin
261       dec(Index1);
262       dec(Index2)
263     end
264     else
265     begin
266       if (succ(PatSize - Index2) > (BMT[ord(Buffer2[Index1])])) then
267         inc(Index1, succ(PatSize - Index2))
268       else
269         inc(Index1, BMT[ord(Buffer2[Index1])]);
270       Index2 := PatSize
271     end;
272   until
273     (Index2 < 1) or (Index1 > BuffSize);
274   if (Index1 > BuffSize) then
275     BMsearch := 0
276   else
277     BMsearch := succ(Index1)
278 end;
279 
280 type
281   arby_64K = array[1..65520] of byte;
282 
283 var
284   Index: word;
285   st_Temp: string[10];
286   Buffer: ^arby_64K;
287   BMT: BMTable;
288 
289 begin
290   new(Buffer);
291   fillchar(Buffer^, sizeof(Buffer^), 0);
292   st_Temp := 'Gumby';
293   move(st_Temp[1], Buffer^[65516], length(st_Temp));
294   Create_BMTable(st_Temp, BMT);
295   Index := BMSearch(Buffer^, sizeof(Buffer^), BMT, st_Temp);
296   writeln(st_Temp, ' found at offset ', Index)
297 end.


			
Vote: How useful do you find this Article/Tip?
Bad Excellent
1 2 3 4 5 6 7 8 9 10

 

Advertisement
Share this page
Advertisement
Download from Google

Copyright © Mendozi Enterprises LLC