Author: Tomas Rutkauskas
I have an array of char called FBuffer1. Let's say: StrCopy(FBuffer1,'Test'). I
also have a file, let's say File1.exe. I would like to find a very quick way to be
able to localize the string "test" in the file1.exe
Answer:
Solve 1:
One way is to remove the file access problems. Load the whole file into a
TMemoryStream, then search the stream. Example:
1 { ... }
2 var
3 tmem: TMemoryStream;
4 buf: array[1..4] of Char;
5 begin
6 zeromemory(@buf, 4);
7 tmem := TMemoryStream.Create;
8 tmem.loadfromfile('test1.exe');
9 tmem.position := 0;
10 while tmem.position <> tmem.size do
11 begin
12 buf[1] := buf[2];
13 buf[2] := buf[3];
14 buf[3] := buf[4];
15 tmem.read(buf[4], 1);
16 if compare(buf, 'hello') then
17 Memo1.Lines.Add('match found at position ' + Inttostr(tmem.position));
18 end;
19 tmem.destroy;
20 end;
Solve 2:
I was working on just that some time ago. Here is my project file with some
alternative functions and a time test. Just paste the following listing into a text
file, rename the file to Project1.dpr, open the file in Delphi and run it.
21 {$APPTYPE CONSOLE}
22
23 program Project1;
24
25 uses
26 Windows, SysUtils;
27
28 function ScanString(SourceStart, SourceEnd, Search: PChar; CaseSensitive: Boolean):
29 PChar;
30 var
31 SourcePtr: PChar;
32 SourceChr: Char;
33 SearchPos: DWord;
34 SearchPtr: PChar;
35 begin
36 Result := nil;
37 if SourceStart > SourceEnd then
38 Exit;
39 if not CaseSensitive then
40 CharUpperBuff(Search, Length(Search));
41 SourcePtr := SourceStart;
42 SearchPos := 0;
43 SearchPtr := Search;
44 while SourcePtr <= SourceEnd do
45 begin
46 SourceChr := SourcePtr^;
47 if not CaseSensitive then
48 CharUpperBuff(@SourceChr, 1);
49 if SourceChr = SearchPtr^ then
50 begin
51 Inc(SearchPtr);
52 if SearchPtr^ = #0 then
53 begin
54 Result := SourcePtr - SearchPos;
55 Break;
56 end;
57 Inc(SearchPos);
58 end
59 else if SearchPos > 0 then
60 begin
61 SearchPos := 0;
62 SearchPtr := Search;
63 end;
64 Inc(SourcePtr);
65 end;
66 end;
67
68 function ScanStringNew(SourceStart, SourceEnd, SearchStr: PChar;
69 CaseSensitive: Boolean): PChar;
70 var
71 SourcePtr: PChar;
72 ScanLen: DWord;
73 ScanPos: DWord;
74 ScanStr: PChar;
75 ScanPtr: PChar;
76 ScanUppStr: PChar;
77 ScanUppPtr: PChar;
78 ScanLowStr: PChar;
79 ScanLowPtr: PChar;
80 begin
81 Result := nil;
82 if SourceStart > SourceEnd then
83 Exit;
84 ScanLen := Length(SearchStr);
85 if not CaseSensitive then
86 begin
87 GetMem(ScanUppStr, ScanLen);
88 CopyMemory(ScanUppStr, SearchStr, ScanLen);
89 CharUpperBuff(ScanUppStr, ScanLen);
90 GetMem(ScanLowStr, ScanLen);
91 CopyMemory(ScanLowStr, SearchStr, ScanLen);
92 CharLowerBuff(ScanLowStr, ScanLen);
93 end
94 else
95 begin
96 ScanUppStr := SearchStr;
97 ScanLowStr := SearchStr;
98 end;
99 ScanPos := 0;
100 ScanUppPtr := ScanUppStr;
101 ScanLowPtr := ScanLowStr;
102 SourcePtr := SourceStart;
103 ScanPtr := ScanStr;
104 while SourcePtr <= SourceEnd do
105 begin
106 if (SourcePtr^ = ScanUppPtr^) or (SourcePtr^ = ScanLowPtr^) then
107 begin
108 Inc(ScanPos);
109 if ScanPos = ScanLen then
110 begin
111 Result := SourcePtr - ScanPos + 1;
112 Break;
113 end;
114 Inc(ScanUppPtr);
115 Inc(ScanLowPtr);
116 end
117 else if ScanPos > 0 then
118 begin
119 ScanPos := 0;
120 ScanUppPtr := ScanUppStr;
121 ScanLowPtr := ScanLowStr;
122 end;
123 Inc(SourcePtr);
124 end;
125 if not CaseSensitive then
126 begin
127 FreeMem(ScanUppStr, ScanLen);
128 FreeMem(ScanLowStr, ScanLen);
129 end;
130 end;
131
132
133 function ScanStringAsm(SourceStart, SourceEnd, SearchStr: PChar;
134 CaseSensitive: Boolean): PChar;
135 var
136 ScanLen: DWord;
137 ScanPos: DWord;
138 ScanStr: PChar;
139 ScanPtr: PChar;
140 ScanUppStr: PChar;
141 ScanUppPtr: PChar;
142 ScanLowStr: PChar;
143 ScanLowPtr: PChar;
144 begin
145 if SourceStart > SourceEnd then
146 Exit;
147 ScanLen := Length(SearchStr);
148 if not CaseSensitive then
149 begin
150 GetMem(ScanUppStr, ScanLen);
151 CopyMemory(ScanUppStr, SearchStr, ScanLen);
152 CharUpperBuff(ScanUppStr, ScanLen);
153 GetMem(ScanLowStr, ScanLen);
154 CopyMemory(ScanLowStr, SearchStr, ScanLen);
155 CharLowerBuff(ScanLowStr, ScanLen);
156 end
157 else
158 begin
159 ScanUppStr := SearchStr;
160 ScanLowStr := SearchStr;
161 end;
162 GetMem(ScanStr, ScanLen * 2 + 2);
163 ScanPos := ScanLen;
164 ScanPtr := ScanStr;
165 ScanUppPtr := ScanUppStr;
166 ScanLowPtr := ScanLowStr;
167 while ScanPos > 0 do
168 begin
169 ScanPtr^ := ScanUppPtr^;
170 Inc(ScanPtr);
171 Inc(ScanUppPtr);
172 ScanPtr^ := ScanLowPtr^;
173 Inc(ScanPtr);
174 Inc(ScanLowPtr);
175 Dec(ScanPos);
176 end;
177 ScanPtr^ := #0;
178
179 asm
180 {Register use:
181 EDI - pointer to source char
182 ESI - pointer to par of scan chars
183 AL - current source char
184 EBX - match length counter
185 ECX - source length counter
186 DX - current par of scan chars}
187
188 end;
189
190
191
192 if not CaseSensitive then
193 begin
194 FreeMem(ScanUppStr, ScanLen);
195
196
197 FreeMem(ScanLowStr, ScanLen);
198 end;
199
200
201 FreeMem(ScanStr, ScanLen * 2 + 2);
202
203 end;
204
205
206 end;
207
208 if not CaseSensitive then
209 begin
210 FreeMem(ScanUppStr, ScanLen);
211
212 FreeMem(ScanLowStr, ScanLen);
213
214 end;
215
216 FreeMem(ScanStr, ScanLen * 2 + 2);
217
218 end;
219
220
221 {Preserve registers:}
222 PUSH EBX {Preserve registers EBX, EDI, ESI:}
223 PUSH EDI
224 PUSH ESI
225 {Initialize registers:}
226 MOV EDI, SourceStart {Move addr SourceStart to EDI}
227 MOV ECX, SourceEnd {Calculate source length in ECX:}
228 SUB ECX, EDI
229 INC ECX
230 MOV ESI, ScanStr {Move addr ScanStr to ESI}
231 MOV DX, WORD[ESI] {Move first par of scan chars to DX}
232 xor EBX, EBX {Set EBX (match counter) to 0}
233 @01: {Main test loop:}
234 MOV AL, BYTE[EDI] {Move current source char to AL}
235 INC EDI {Inc EDI to point to next source char}
236 CMP AL, DL {Compare AL with scan char in DL (uppcase)}
237 JE@10 {Jump to @10 if equal (match)}
238 CMP AL, DH {Compare AL with scan char in DH (lowcase)}
239 JE@10 {Jump to @10 if equal (match)}
240 TEST EBX, EBX {Test EBX (match counter)}
241 JZ@02 {Jump to @02 if zero (i.e. first scan char)}
242 SUB ESI, EBX {Move ESI back to start of scan string:}
243 SUB ESI, EBX
244 MOV DX, WORD[ESI] {Move first par of scan chars to DX}
245 xor EBX, EBX {Set EBX to 0}
246 @02: {Next loop:}
247 DEC ECX {Dec ECX (source length counter)}
248 JNZ@01 {Jump back to @01 if not zero}
249 MOV Result, 0 {Move nil to Result (match not found)}
250 JMP@99 {Jump to @99}
251 @10: {Char match found:}
252 INC EBX {Inc EBX (match length counter):}
253 ADD ESI, 2 {Move ESI to next par of scan chars:}
254 MOV DX, WORD[ESI] {Move this par of scan chars to DX}
255 CMP DL, 0 {Compare char in DL with #0 (end of string)}
256 JNE@02 {Jump to @02 if not equal (test next char)}
257 {Match found:}
258 SUB EDI, EBX {Move EDI back to first char in match}
259 MOV Result, EDI {Move addr of match to Result}
260 @99: {Restore registers:}
261 POP ESI
262 POP EDI
263 POP EBX
264 end;
265
266 if not CaseSensitive then
267 begin
268 FreeMem(ScanUppStr, ScanLen);
269 FreeMem(ScanLowStr, ScanLen);
270 end;
271 FreeMem(ScanStr, ScanLen * 2 + 2);
272 end;
273
274
275 procedure TimeTest2;
276 var
277 Time1: DWord;
278 Time2: DWord;
279 Search: string;
280 TestName: string;
281 TestFile: file;
282 TestSize: DWord;
283 TestBuff: PChar;
284 TestScan: PChar;
285 TestPtr: PChar;
286 TestPos: Integer;
287 HitCount: Integer;
288 n, i, j: Integer;
289 c: Char;
290 Show: Boolean;
291 begin
292 n := 20;
293 Show := false;
294 Search := 'WINDOWS';
295
296 {TestBuff := PChar(Search);
297 TestScan := TestBuff;
298 c := TestScan^;
299 Time1 := GetTickCount;
300 for i := 1 to 10000000 do
301 begin
302 if TestBuff^ = c then
303 begin
304 end;
305 end;
306 Time2 := GetTickCount;
307 WriteLn('Tickcount : ', Time2 - Time1);
308 Exit;}
309
310 TestName := 'c:\windows\help\getstart.chm';
311 AssignFile(TestFile, TestName);
312 Reset(TestFile, 1);
313 TestSize := FileSize(TestFile);
314 GetMem(TestBuff, TestSize);
315 BlockRead(TestFile, TestBuff^, TestSize);
316 CloseFile(TestFile);
317
318 WriteLn;
319 WriteLn('Scaning for "', Search, '" ', n, ' times');
320 WriteLn('in file: ', TestName, ' size: ', TestSize, ' bytes');
321
322 HitCount := 0;
323 Time1 := GetTickCount;
324 for i := 1 to n do
325 begin
326 TestScan := TestBuff;
327 repeat
328 if TestScan <> TestBuff then
329 Inc(TestScan, Length(Search));
330 TestScan := ScanString(TestScan, TestBuff + TestSize - 1, PChar(Search),
331 false);
332 if TestScan <> nil then
333 begin
334 Inc(HitCount);
335 if Show then
336 begin
337 write(HitCount, ' ');
338 TestPtr := TestScan;
339 for TestPos := 1 to Length(Search) do
340 begin
341 write(TestPtr^);
342 Inc(TestPtr);
343 end;
344 WriteLn;
345 ReadLn;
346 end;
347 end;
348 until TestScan = nil;
349 end;
350 Time2 := GetTickCount;
351 WriteLn(' Tickcount ScanString : ', Time2 - Time1: 5, 'ms', ' hitcount:',
352 HitCount);
353 HitCount := 0;
354 Time1 := GetTickCount;
355 for i := 1 to n do
356 begin
357 TestScan := TestBuff;
358 repeat
359 if TestScan <> TestBuff then
360 Inc(TestScan, Length(Search));
361 TestScan := ScanStringNew(TestScan, TestBuff + TestSize - 1, PChar(Search),
362 false);
363 if TestScan <> nil then
364 begin
365 Inc(HitCount);
366 if Show then
367 begin
368 write(HitCount, ' ');
369 TestPtr := TestScan;
370 for TestPos := 1 to Length(Search) do
371 begin
372 write(TestPtr^);
373 Inc(TestPtr);
374 end;
375 WriteLn;
376 ReadLn;
377 end;
378 end;
379 until TestScan = nil;
380 end;
381 Time2 := GetTickCount;
382 WriteLn(' Tickcount ScanStringNew: ', Time2 - Time1: 5, 'ms', ' hitcount:',
383 HitCount);
384 HitCount := 0;
385 Time1 := GetTickCount;
386 for i := 1 to n do
387 begin
388 TestScan := TestBuff;
389 repeat
390 if TestScan <> TestBuff then
391 Inc(TestScan, Length(Search));
392 TestScan := ScanStringAsm(TestScan, TestBuff + TestSize - 1, PChar(Search),
393 false);
394 if TestScan <> nil then
395 begin
396 Inc(HitCount);
397 if Show then
398 begin
399 write(HitCount, ' ');
400 TestPtr := TestScan;
401 for TestPos := 1 to Length(Search) do
402 begin
403 write(TestPtr^);
404 Inc(TestPtr);
405 end;
406 WriteLn;
407 ReadLn;
408 end;
409 end;
410 until TestScan = nil;
411 end;
412 Time2 := GetTickCount;
413 WriteLn(' Tickcount ScanStringAsm: ', Time2 - Time1: 5, 'ms', ' hitcount:',
414 HitCount);
415 FreeMem(TestBuff, TestSize);
416 end;
417
418 begin
419 TimeTest2;
420 WriteLn;
421 WriteLn('** press enter to close **');
422 ReadLn;
423 end.
Solve 3:
424
425 function ScanFile(const filename: string; const forString: string; caseSensitive:
426 Boolean): LongInt;
427 { returns position of string in file or -1, if not found }
428 const
429 BufferSize = $8001; { 32K + 1 bytes }
430 var
431 pBuf, pEnd, pScan, pPos: Pchar;
432 filesize: LongInt;
433 bytesRemaining: LongInt;
434 bytesToRead: Integer;
435 F: file;
436 SearchFor: Pchar;
437 oldMode: Word;
438 begin
439 Result := -1; { assume failure }
440 if (Length(forString) = 0) or (Length(filename) = 0) then
441 Exit;
442 SearchFor := nil;
443 pBuf := nil;
444 { open file as binary, 1 byte recordsize }
445 AssignFile(F, filename);
446 oldMode := FileMode;
447 FileMode := 0; { read-only access }
448 Reset(F, 1);
449 FileMode := oldMode;
450 try { allocate memory for buffer and pchar search string }
451 SearchFor := StrAlloc(Length(forString) + 1);
452 StrPCopy(SearchFor, forString);
453 if not caseSensitive then { convert to upper case }
454 AnsiUpper(SearchFor);
455 GetMem(pBuf, BufferSize);
456 filesize := System.Filesize(F);
457 bytesRemaining := filesize;
458 pPos := nil;
459 while bytesRemaining > 0 do
460 begin
461 { calc how many bytes to read this round }
462 if bytesRemaining >= BufferSize then
463 bytesToRead := Pred(BufferSize)
464 else
465 bytesToRead := bytesRemaining;
466 { read a buffer full and zero-terminate the buffer }
467 BlockRead(F, pBuf^, bytesToRead, bytesToRead);
468 pEnd := @pBuf[bytesToRead];
469 pEnd^ := #0;
470 { scan the buffer. Problem: buffer may contain #0 chars! So we treat it as
471 a concatenation of zero-terminated strings. }
472 pScan := pBuf;
473 while pScan < pEnd do
474 begin
475 if not caseSensitive then { convert to upper case }
476 AnsiUpper(pScan);
477 pPos := StrPos(pScan, SearchFor); { search for substring }
478 if pPos <> nil then
479 begin { Found it! }
480 Result := FileSize - bytesRemaining + LongInt(pPos) - LongInt(pBuf);
481 Break;
482 end;
483 pScan := StrEnd(pScan);
484 Inc(pScan);
485 end;
486 if pPos <> nil then
487 Break;
488 bytesRemaining := bytesRemaining - bytesToRead;
489 if bytesRemaining > 0 then
490 begin
491 { no luck in this buffers load. We need to handle the case of the search
492 string spanning two chunks of file now. We simply go back a bit in the file
493 and read from there, thus inspecting some characters twice }
494 Seek(F, FilePos(F) - Length(forString));
495 bytesRemaining := bytesRemaining + Length(forString);
496 end;
497 end;
498 finally
499 CloseFile(F);
500 if SearchFor <> nil then
501 StrDispose(SearchFor);
502 if pBuf <> nil then
503 FreeMem(pBuf, BufferSize);
504 end;
505 end;
Solve 4:
One option is to just read the entire file into a single string. The old-fashioned
way is to use BlockRead. You could also use a file stream. Once you have it in a
single string you can use any normal string operations, even if there are embedded
null bytes or CR/LF's.
506
507 procedure TForm1.Button1Click(Sender: TObject);
508 var
509 s: string;
510 f: file;
511 p: integer;
512 begin
513 AssignFile(f, 'c:\winnt\system32\mspaint.exe');
514 FileMode := 0;
515 Reset(f, 1);
516 SetLength(s, FileSize(f));
517 BlockRead(f, s[1], FileSize(f));
518 CloseFile(f);
519 p := pos('This program cannot be run in DOS mode', s);
520 Label1.Caption := 'Found at : ' + IntToStr(p);
521 end;
|