Author: Jonas Bilinkevicius
How to split up a formatted source string into substrings and integers
Answer:
function Unformat(const source, pattern: string; const args: array of const):
Integer;
{The opposite of Format, Unformat splits up a formatted source string into
substrings and Integers.
It is an alternative to parsing when the format is known to be fixed. The pattern
parameter contains the format string, which is a combination of plain characters
and format specifiers.
The following specifiers are supported:
%s indicates that a string value is required
%d indicates that an integer value is required
%S indicates that a string value should be ignored
%D indicates that an integer value should be ignored
Unformat compares the source with the pattern, and plain characters that do not
match will raise an EConvertError. When a format specifier is encountered in the
pattern, an argument is fetched and used to store the result that is obtained from
the source. Then the comparison continues.
For each %s, the args list must contain a pointer to a string variable, followed by
an integer specifying the maximum length of the string. For each %d, the args list
must contain a pointer to an integer variable.
When the end of the source string is reached, the function returns without
modifying the remaining arguments, so you might wish to initialize your variables
to "default" values before the function call.
Unformat returns the number of values it has extracted.
Examples:
1 var
2 s1, s2: string[31];
3 i: Integer;
4
5 Unformat('[abc]123(def)', '[%s]%d(%s)', [@s1, 31, @i, @s2, 31]);
6 (* s1 = 'abc', i = 123, s2 = 'def' *)
7
8 Unformat('Hello, Universe!!!', '%s, %s%d', [@s1, 31, @s2, 31, @i]);
9 (* s1 = 'Hello', s2 = 'Universe!!!', i is untouched *)
10
11 Unformat('How much wood could a woodchuck chuck...',
12 '%S %S %s could a %S %s...', [@s1, 31, @s2, 31]);
13 (* s1 = 'wood', s2 = 'chuck' *)
14 }
15
16 function Min(a, b: Integer): Integer; assembler;
17
18 { use AX for 16-bit, EAX for 32-bit }
19 asm
20 MOV EAX,a
21 CMP EAX,b
22 JLE @@1
23 MOV EAX,b
24 @@1:
25 end;
26
27 var
28 i, j, argindex, start, finish, maxlen: Integer;
29 c: Char;
30 begin
31 Result := 0;
32 argindex := 0;
33 i := 1;
34 j := 1;
35 while (i < Length(pattern)) and (j <= Length(source)) do
36 begin
37 if pattern[i] = '%' then
38 case pattern[i + 1] of
39 'D':
40 begin
41 Inc(i, 2);
42 while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
43 '-')) do
44 Inc(j);
45 Inc(Result);
46 end;
47 'S':
48 begin
49 Inc(i, 2);
50 if i > Length(pattern) then
51 break
52 else
53 begin
54 c := pattern[i];
55 while (j <= Length(source)) and (source[j] <> c) do
56 Inc(j);
57 end;
58 Inc(Result);
59 end;
60 'd':
61 begin
62 if argindex > High(args) then
63 raise EConvertError.Create('Not enough arguments');
64 Inc(i, 2);
65 start := j;
66 while (j <= Length(source)) and ((source[j] in Digits) or (source[j] =
67 '-')) do
68 Inc(j);
69 finish := j;
70 if finish > start then
71 PInteger(args[argindex].VPointer)^ := StrToInt(Copy(source, start,
72 finish - start));
73 Inc(argindex);
74 Inc(Result);
75 end;
76 's':
77 begin
78 if argindex > High(args) - 1 then
79 raise EConvertError.Create('Not enough arguments');
80 if args[argindex + 1].VType <> vtInteger then
81 raise EConvertError.Create('No string size specified');
82 maxlen := args[argindex + 1].VInteger;
83 Inc(i, 2);
84 if i > Length(pattern) then
85 begin
86 args[argindex].VString^ := Copy(source, j, Min(Length(source) + 1 - j,
87 maxlen));
88 Inc(argindex);
89 break;
90 end
91 else
92 begin
93 c := pattern[i];
94 start := j;
95 while (j <= Length(source)) and (source[j] <> c) do
96 Inc(j);
97 finish := j;
98 args[argindex].VString^ := Copy(source, start, Min(finish - start,
99 maxlen));
100 Inc(argindex, 2);
101 end;
102 Inc(Result);
103 end;
104 else
105 Inc(i);
106 end
107 else
108 {if pattern[i] <> source[j] then
109 raise EConvertError.Create('Pattern mismatch')
110 else}
111 begin
112 Inc(i);
113 Inc(j);
114 end;
115 end;
116 end;
|