Author: Jonas Bilinkevicius
I need to locate a pattern in a file (both text and binary) - just like the Pos
function does with the strings. Preferably, it should deal with FileStream.
Straightforward solution first seemed kind of expensive - that is to just plainly
go through the stream comparing patterns on every step.
Answer:
Solve 1:
You can do it that way but it is much faster to load chunks of data into a sizeable
buffer and do the search in the buffer. Here is an example:
1
2 function ScanFile(const filename: string; const forString: string; caseSensitive:
3 Boolean): LongInt;
4 { returns position of string in file or -1, if not found }
5 const
6 BufferSize = $8001; { 32K + 1 bytes }
7 var
8 pBuf, pEnd, pScan, pPos: Pchar;
9 filesize: LongInt;
10 bytesRemaining: LongInt;
11 bytesToRead: Word;
12 F: file;
13 SearchFor: Pchar;
14 oldMode: Word;
15 begin
16 Result := -1; { assume failure }
17 if (Length(forString) = 0) or (Length(filename) = 0) then
18 Exit;
19 SearchFor := nil;
20 pBuf := nil;
21 { open file as binary, 1 byte recordsize }
22 AssignFile(F, filename);
23 oldMode := FileMode;
24 FileMode := 0; { read-only access }
25 Reset(F, 1);
26 FileMode := oldMode;
27 try { allocate memory for buffer and pchar search string }
28 SearchFor := StrAlloc(Length(forString) + 1);
29 StrPCopy(SearchFor, forString);
30 if not caseSensitive then { convert to upper case }
31 AnsiUpper(SearchFor);
32 GetMem(pBuf, BufferSize);
33 filesize := System.Filesize(F);
34 bytesRemaining := filesize;
35 pPos := nil;
36 while bytesRemaining > 0 do
37 begin
38 { calc how many bytes to read this round }
39 if bytesRemaining >= BufferSize then
40 bytesToRead := Pred(BufferSize)
41 else
42 bytesToRead := bytesRemaining;
43 { read a buffer full and zero-terminate the buffer }
44 BlockRead(F, pBuf^, bytesToRead, bytesToRead);
45 pEnd := @pBuf[bytesToRead];
46 pEnd^ := #0;
47 { scan the buffer. Problem: buffer may contain #0 chars! So we
48 treat it as a concatenation of zero-terminated strings. }
49 pScan := pBuf;
50 while pScan < pEnd do
51 begin
52 if not caseSensitive then { convert to upper case }
53 AnsiUpper(pScan);
54 pPos := StrPos(pScan, SearchFor); { search for substring }
55 if pPos <> nil then
56 begin { Found it! }
57 Result := FileSize - bytesRemaining + LongInt(pPos) - LongInt(pBuf);
58 Break;
59 end;
60 pScan := StrEnd(pScan);
61 Inc(pScan);
62 end;
63 if pPos <> nil then
64 Break;
65 bytesRemaining := bytesRemaining - bytesToRead;
66 if bytesRemaining > 0 then
67 begin
68 { no luck in this buffers load. We need to handle the case of the
69 search string spanning two chunks of file now. We simply go back a bit in
70 the file and read from there, thus inspecting some characters twice }
71 Seek(F, FilePos(F) - Length(forString));
72 bytesRemaining := bytesRemaining + Length(forString);
73 end;
74 end;
75 finally
76 CloseFile(F);
77 if SearchFor <> nil then
78 StrDispose(SearchFor);
79 if pBuf <> nil then
80 FreeMem(pBuf, BufferSize);
81 end;
82 end;
Solve 2:
83 procedure TForm1.Button1Click(Sender: TObject);
84 var
85 s: string;
86 hFile: THandle;
87 hFileMapObj: THandle;
88 pSharedBuf: Pointer;
89 Time0: Integer;
90 p: PChar;
91 begin
92 if not OpenDialog1.Execute then
93 Exit;
94 s := InputBox('Find', 'Match', '');
95 Time0 := GetTickCount;
96 hfile := 0;
97 hFileMapObj := 0;
98 pSharedBuf := nil;
99 try
100 hFile := FileOpen(OpenDialog1.FileName, fmOpenRead);
101 Win32Check(hFileMapObj <> INVALID_HANDLE_VALUE);
102 hFileMapObj := CreateFileMapping(hFile, nil, PAGE_READONLY, 0, 0, nil);
103 Win32Check(hFileMapObj <> 0);
104 pSharedBuf := MapViewOfFile(hFileMapObj, FILE_MAP_READ, 0, 0, 0);
105 Win32Check(pSharedBuf <> nil);
106 P := StrPos(PChar(pSharedBuf), PChar(s));
107 finally
108 if pSharedBuf <> nil then
109 UnMapViewOfFile(pSharedBuf);
110 if hFileMapObj <> 0 then
111 CloseHandle(hFileMapObj);
112 if hFile <> 0 then
113 CloseHandle(hFile);
114 end;
115 if P = nil then
116 Caption := Format('Not found, ticks=%d', [GetTickCount - Time0])
117 else
118 Caption := Format('Found it at pos %d, ticks=%d', [Integer(P -
119 PChar(pSharedBuf)),
120 GetTickCount - Time0]);
121 end;
|