|
Another method to parse an XML file |
|
Author: Yurii Zhukow
Another method to parse an XML file
Answer:
Here I will show one way to parse XML document
The main concept of XML is using containers for XML objects - so we will use Tree
concept while building our XML object from XML document.
XML text uses containers (...) or simple definitions ()
in each TAG we can use parameters (... ...)
Finally we will have an array of objects, describing XML tags. Every object of this
class will have an array of children if needed, and a hash to describe properties
of it.
For example if we have a text
we will have one root object (named "UL") in wich we will have 4 children (named
"LI" with different sets of properties - from "NAME"="xxx1" to "NAME"="xxx4")
This is not a trivial task - so we will make a unit to solve this... I will try to
comment some here...
1 unit YZXMLParser;
2
3 interface
4
5 uses
6 SysUtils, ComCtrls;
7
8 type
9 THashElement = record
10 Key, Value: string;
11 end;
12
13 type
14 THashElementArr = array of THashElement;
15
16 // here we declare a THash class to use in our parser
17 // The concept of THash is to retreive named values from an array
18 // Hash is an array where index is a string (example V[Key]=value,
19 // whehe Key and Value are of type string)
20
21 // The main purpose of this class is to rerurn a value of a String-named key
22 //(example: s:=hash['someValue'])
23
24 // the description of a hash element we use
25
26 type
27 THash = class(TObject)
28 private
29 Arr: THashElementArr;
30 function GetValue(Key: string): string;
31 procedure SetValue(Key: string; const VValue: string);
32 function GetKeys: StrArr;
33 function GetValues: StrArr;
34 function GetCount: Integer;
35 function Getempty: Boolean;
36 public
37 property Value[Key: string]: string read GetValue write SetValue; default;
38 property Values: StrArr read GetValues;
39 property Keys: StrArr read GetKeys;
40 property Count: Integer read GetCount;
41 property Empty: Boolean read Getempty;
42 procedure Clear;
43 constructor Create;
44 destructor Destroy; override;
45 end;
46
47 TYZHash = THash;
48
49 type
50
51 // Here we declare some definitions for our parser to know what
52 // identifier we would receive next in our text
53 // these values will be used in the result of WhatNext() function which will
54 scan text for keys
55
56 TYZXMLMarker = (xmlOpenTag, xmlCloseTagShort, xmlCloseTag, xmlCloseTagLong,
57 xmlEOF, xmlIdentifier, xmlunknown);
58 / *
59
60 Because we use recursive definition of our class(as TreeView, where we declare
61 children of
62 the same type in opur type
63 declaration)we must use forward declaration
64 * /
65
66 // The definition of a TAG class
67
68 TYZXMLTag = class;
69 TYZXMLTags = array of TYZXMLTag;
70
71 TYZXMLTag = class(TObject)
72 private
73 FData: TYZHash;
74 FParent: TYZXMLTag;
75 FName: string;
76
77 function GetValue(AName: string): string;
78 procedure SetName(const Value: string);
79 procedure SetValue(AName: string; const Value: string);
80 function GetCount: Integer;
81 function GetValueNames: strarr;
82
83 public
84 Children: TYZXMLTags; // these are our child nodes
85 Text: string;
86
87 property Name: string read FName write SetName; // name of a tag
88 property Values[AName: string]: string read GetValue write SetValue;
89 default; // values of properties of a tag (hash values)
90 property ValueNames: strarr read GetValueNames;
91 // array of strings returniong names of all props of this tag
92 property Count: Integer read GetCount;
93 // a count of children of a tag (if this tag is a container)
94
95 function SkipSpaces(var AData: string; var APos: Integer;
96 RememberBreaks: Boolean = False): Char;
97 // internal. for skip spaces (also CR or LF or other non-text chars) while
98 parsing text
99
100 function ParseValue(var AData: string; var APos: Integer): Boolean;
101 // parse value (calling when found a parameter of a tag)
102 function ParseName(var AData: string; var APos: Integer): Boolean;
103 // parse key of parameter in a tag
104
105 // these two procs used to parse any text found while parsing XML
106 function ParseString(var AData: string; var APos: Integer;
107 RememberBreaks: Boolean = False): string;
108 function ParseQuotedString(var AData: string; var APos: Integer;
109 QIndef: Char = '"'): string;
110
111 // returnes the type of next identifier in XML
112 function WhatNext(var AData: string; var APos: Integer;
113 var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
114
115 // This is a main procedure of our class - AData is a string,
116 // containing all XML data (you can use TMemo.Text, for example, as a parameter
117 of AData)
118 function ParseXML(var AData: string; var APos: Integer): Boolean;
119
120 // This function returnes a text string, built based on data, stored in an
121 object.
122 function GenerateXML(var AData: string; ATab: string = ''): Boolean;
123
124 // returnes char from string at specified pos (#0 if not in range)
125 function CharAt(var S: string; APos: Integer): Char;
126
127 function TagNameExists(AName: string): Boolean;
128
129 // Adds a child to children array of a current tag
130
131 function AddChild: TYZXMLTag;
132
133 // Initializes current tag and deletes all existing children
134 procedure Clear; virtual;
135
136 constructor Create(AParent: TYZXMLTag); virtual;
137 destructor Destroy; virtual;
138 end;
139
140 type
141 TYZXMLParser = class(TYZXMLTag)
142 private
143 Header: TYZHash;
144 procedure _BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode; ATag:
145 TYZXMLTag);
146 public
147 property HeaderValues: TYZHash read Header;
148
149 procedure BuildTreeView(ATreeView: TTreeView);
150 function Parse(AData: string): Boolean;
151 function Generate(var AData: string): Boolean;
152 constructor Create;
153 destructor Destroy;
154 end;
155
156 implementation
157
158 //==============================================================================
159
160 { TYZXMLTag }
161
162 function TYZXMLTag.AddChild: TYZXMLTag;
163 begin
164 setlength(children, Length(children) + 1);
165 Result := TYZXMLTag.Create(Self);
166 children[Length(children) - 1] := Result;
167 end;
168
169 //------------------------------------------------------------------------------
170
171 procedure TYZXMLTag.Clear;
172 var
173 i: Integer;
174 begin
175 for i := 0 to Count - 1 do
176 if children[i] <> nil then
177 Children[i].Destroy;
178 setlength(children, 0);
179 FData.Clear;
180 Text := '';
181 end;
182
183 //------------------------------------------------------------------------------
184
185 constructor TYZXMLTag.Create(AParent: TYZXMLTag);
186 begin
187 inherited Create;
188 FData := TYZHash.Create;
189 FParent := AParent;
190 Clear;
191 end;
192
193 //------------------------------------------------------------------------------
194
195 destructor TYZXMLTag.Destroy;
196 begin
197 Clear;
198 FData.Destroy;
199 end;
200
201 //------------------------------------------------------------------------------
202
203 function TYZXMLTag.GetCount: Integer;
204 begin
205 Result := Length(children);
206 end;
207
208 //------------------------------------------------------------------------------
209
210 function TYZXMLTag.GetValue(AName: string): string;
211 begin
212 Result := FData[AName];
213 end;
214
215 //------------------------------------------------------------------------------
216
217 function TYZXMLTag.ParseName(var AData: string; var APos: Integer): Boolean;
218 begin
219 Result := False;
220 FName := ParseString(AData, APos);
221 if fname = '' then
222 Exit;
223 Result := True;
224 end;
225
226 //------------------------------------------------------------------------------
227
228 function TYZXMLTag.ParseQuotedString(var AData: string; var APos: Integer;
229 QIndef: Char = '"'): string;
230 var
231 i: Integer;
232 skipnext: Boolean;
233 z: Char;
234 begin
235 Result := '';
236 if CharAt(AData, APos) <> QIndef then
237 Exit;
238 i := apos;
239 skipnext := True;
240 repeat
241 if not skipnext then
242 begin
243 if charat(AData, I) = '\' then
244 SkipNext := True
245 else
246 begin
247 z := charat(AData, I);
248 if (Z = QIndef) or (z = #0) then
249 begin
250 Result := Copy(AData, aPos + 1, I - APos - 1);
251 // result:=exch(result,'\','');
252 APos := I + 1;
253 Exit;
254 end;
255 end;
256 end
257 else
258 skipnext := False;
259 Inc(i);
260 until False;
261 end;
262
263 //------------------------------------------------------------------------------
264
265 function TYZXMLTag.ParseString(var AData: string; var APos: Integer;
266 RememberBreaks: Boolean = False): string;
267 const
268 extsym: string = '=<>;?*/';
269 var
270 nxt: Char;
271 x1, x2, i: Integer;
272 begin
273 Result := '';
274 nxt := SkipSpaces(AData, APos, RememberBreaks);
275 if nxt = #0 then
276 Exit;
277 if (nxt = '"') or (nxt = '''') then
278 begin
279 Result := ParseQuotedString(AData, APos);
280 Exit;
281 end;
282 x1 := APos;
283 i := x1;
284 nxt := CharAt(AData, i);
285 while ((Ord(nxt) <= 32) or (Pos(nxt, extsym) > 0)) and (nxt <> #0) do
286 begin
287 Inc(i);
288 nxt := CharAt(AData, i);
289 end;
290 APos := i;
291 X1 := APos;
292 while (Ord(nxt) > 32) and (Pos(nxt, extsym) <= 0) do
293 begin
294 Inc(i);
295 nxt := CharAt(AData, i);
296 end;
297 x2 := i - x1;
298 Result := Copy(AData, x1, x2);
299 APos := i;
300 end;
301
302 //------------------------------------------------------------------------------
303
304 function TYZXMLTag.ParseValue(var AData: string; var APos: Integer): Boolean;
305 var
306 n, v: string;
307 i, x: Integer;
308 begin
309 Result := False;
310 n := parseString(AData, APos);
311 if n = '' then
312 Exit;
313 if skipspaces(AData, APos) <> '=' then
314 Exit;
315 Inc(apos);
316 V := parseString(AData, APos);
317 fdata[n] := dequote(v);
318 Result := True;
319 end;
320
321 //------------------------------------------------------------------------------
322
323 function TYZXMLTag.ParseXML(var AData: string; var APos: Integer): Boolean;
324 var
325 N: TYZXMLMarker;
326 nxt: Integer;
327 isLong: Boolean;
328 inTag: Boolean;
329 begin
330 isLong := False;
331 Result := False;
332 Clear;
333 if WhatNext(AData, APos, nxt) <> xmlOpenTag then
334 Exit;
335 APos := nxt;
336 if WhatNext(AData, APos, nxt) <> xmlIdentifier then
337 Exit;
338 Result := ParseName(AData, APos);
339 if not Result then
340 Exit;
341 intag := True;
342 Result := False;
343 while True do
344 begin
345 N := WhatNext(AData, APos, nxt, (not intag and islong and (Count > 0)));
346 case N of
347 xmlEOF: Exit;
348 xmlCloseTagLong:
349 begin
350 Result := True;
351 if islong then
352 APos := nxt;
353 if (Text <> '') and (Count > 0) then
354 begin
355 Text := exch(Text, #13#10#13#10, #13#10);
356 end;
357
358 Exit;
359 end;
360 xmlCloseTagShort:
361 begin
362 Result := (not IsLong) and intag;
363 if Result then
364 APos := nxt;
365 Exit;
366 end;
367 xmlOpenTag:
368 begin
369 if islong then
370 Result := AddChild.ParseXML(AData, APos)
371 else
372 begin
373 Result := False;
374 Exit;
375 end;
376 if not Result then
377 Exit;
378 end;
379 xmlCloseTag:
380 begin
381 IsLong := True;
382 APos := nxt;
383 intag := False;
384 end;
385 xmlIdentifier:
386 begin
387 if intag then
388 parsevalue(AData, APos)
389 else
390 Text := Text + ParseString(AData, APos, True)
391 end;
392 xmlUnknown:
393 begin
394 Result := True;
395 Exit;
396 end;
397 end;
398 end;
399 end;
400
401 //------------------------------------------------------------------------------
402
403 procedure TYZXMLTag.SetName(const Value: string);
404 begin
405 FName := Value;
406 end;
407
408 //------------------------------------------------------------------------------
409
410 procedure TYZXMLTag.SetValue(AName: string; const Value: string);
411 begin
412 FData[AName] := Value;
413 end;
414
415 //------------------------------------------------------------------------------
416
417 function TYZXMLTag.SkipSpaces(var AData: string; var APos: Integer;
418 RememberBreaks: Boolean = False): Char;
419 var
420 L: Integer;
421 P: Char;
422 begin
423 L := Length(AData);
424 while APos <= L do
425 begin
426 P := AData[APos];
427 if Ord(p) > 32 then
428 begin
429 Result := p;
430 Exit;
431 end
432 else if rememberbreaks then
433 begin
434 if Pos(p, #13#9' ') > 0 then
435 Text := Text + ' ';
436 end;
437 Inc(APos);
438 end;
439 Result := #0;
440 end;
441
442 //------------------------------------------------------------------------------
443
444 function TYZXMLTag.CharAt(var S: string; APos: Integer): Char;
445 begin
446 Result := #0;
447 if (Length(s) < APos) or (apos < 1) then
448 Exit;
449 Result := s[APos];
450 end;
451
452 //------------------------------------------------------------------------------
453
454 function TYZXMLTag.WhatNext(var AData: string; var APos: Integer;
455 var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
456 var
457 s: string;
458 C: Char;
459 P: Integer;
460 begin
461 Result := xmlEOF;
462 P := APos;
463 C := SkipSpaces(AData, APos);
464 P := APos;
465 ANext := P;
466 if C = #0 then
467 Exit;
468
469 if C = '<' then
470 if CharAt(AData, P + 1) = '/' then
471 begin
472 Inc(P, 2);
473 s := parsestring(AData, P);
474 if (uppercase(s) = uppercase(FName)) and (SkipSpaces(AData, P) = '>') then
475 begin
476 ANext := P + 1;
477 Result := xmlCloseTagLong;
478 Exit;
479 end
480 else
481 begin
482 if TagNameExists(s) then
483 begin
484 Result := xmlCloseTagLong;
485 ANext := APos;
486 Exit;
487 end;
488 ANext := P + 1;
489 Result := xmlCloseTagLong;
490 Exit;
491 end;
492 end;
493
494 if C = '<' then
495 begin
496 ANext := P + 1;
497 Result := xmlOpenTag;
498 Exit;
499 end;
500
501 if C = '>' then
502 begin
503 ANext := P + 1;
504 Result := xmlCloseTag;
505 Exit;
506 end;
507 if C = '/' then
508 if CharAt(AData, P + 1) = '>' then
509 begin
510 ANext := P + 2;
511 Result := xmlCloseTagShort;
512 Exit;
513 end;
514 ANext := P;
515 parsestring(AData, ANext);
516 Result := xmlIdentifier;
517 end;
518
519 //------------------------------------------------------------------------------
520
521 function TYZXMLTag.GetValueNames: strarr;
522 begin
523 Result := FData.Keys;
524 end;
525
526 //------------------------------------------------------------------------------
527
528 function TYZXMLTag.GenerateXML(var AData: string; ATab: string = ''): Boolean;
529 var
530 valDelimiter: string;
531 spc: string;
532 i: Integer;
533 a: strarr;
534 begin
535 spc := ATab + #9;
536 if FData.Count < 5 then
537 valDelimiter := ' '
538 else
539 valDelimiter := #13#10 + spc;
540 AData := AData + #13#10 + ATab + '<' + FName;
541 a := FData.keys;
542 for i := 0 to Length(a) - 1 do
543 begin
544 AData := AData + valDelimiter + a[i] + ' = "' + EnQuote(values[a[i]]) + '"';
545 end;
546 if (Count > 0) or (Text <> '') then
547 begin
548 AData := AData + '>' + Text;
549 for i := 0 to Count - 1 do
550 begin
551 Children[i].GenerateXML(AData, ATab + #9);
552 end;
553 AData := AData + #13#10 + ATab + '</' + FName + '>';
554 end
555 else
556 AData := AData + '/>';
557 Result := True;
558 end;
559
560 //------------------------------------------------------------------------------
561
562 function TYZXMLTag.TagNameExists(AName: string): Boolean;
563 begin
564 Result := AnsiUpperCase(AName) = AnsiUpperCase(Self.FName);
565 if Self.FParent = nil then
566 Exit;
567 if not Result then
568 Result := fparent.TagNameExists(AName);
569 end;
570
571 //==============================================================================
572
573 { TYZXMLParser }
574
575 constructor TYZXMLParser.Create;
576 begin
577 Header := TYZHash.Create;
578 inherited Create(nil);
579 end;
580
581 //------------------------------------------------------------------------------
582
583 destructor TYZXMLParser.Destroy;
584 begin
585 inherited;
586 Header.Destroy;
587 end;
588
589 //------------------------------------------------------------------------------
590
591 procedure TYZXMLParser.BuildTreeView(ATreeView: TTreeView);
592 var
593 i: Integer;
594 begin
595 // clear;
596 ATreeView.Items.Clear;
597 for i := 0 to Count - 1 do
598 _BuildTreeView(ATreeView, nil, children[i]);
599 end;
600
601 //------------------------------------------------------------------------------
602
603 procedure TYZXMLParser._BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode;
604 ATag: TYZXMLTag);
605 var
606 i: Integer;
607 N: TTreeNode;
608 begin
609 N := ATreeView.Items.AddChildObject(ANode, ATag.Name + ' ' + FData['ID'],
610 Pointer(ATag));
611 for i := 0 to ATag.Count - 1 do
612 begin
613 if ATag.children[i] <> nil then
614 _BuildTreeView(ATreeView, N, ATag.children[i])
615 else
616 ATreeView.Items.AddChild(N, 'nil');
617 end;
618 N.Expanded := True;
619 end;
620
621 //------------------------------------------------------------------------------
622
623 function TYZXMLParser.Parse(AData: string): Boolean;
624 var
625 x1, x2, X, i: Integer;
626 s: string;
627 tmp: TYZXMLTag;
628 a: strarr;
629 N: TYZXMLMarker;
630 begin
631 X := 1;
632 Self.SkipSpaces(AData, X);
633 x2 := -1;
634 Result := False;
635 Clear;
636 Header.Clear;
637 x1 := Pos('<?', AData);
638 if x1 >= X then
639 begin
640 x2 := Pos('?>', AData);
641 if x2 < X then
642 Exit;
643 s := uppercase(Copy(AData, x1 + 2, 4));
644 if Pos('XML ', s) <> 1 then
645 Exit;
646 s := '<xml ' + Copy(AData, x1 + 6, x2 - x1 - 6) + '/>';
647 tmp := TYZXMLTag.Create(nil);
648 tmp.ParseXML(s, x);
649 a := tmp.ValueNames;
650 for i := 0 to Length(a) - 1 do
651 Header[a[i]] := tmp.Values[a[i]];
652 tmp.Destroy;
653 x := x2 + 2;
654 end;
655 Result := True;
656 repeat
657 N := whatnext(AData, X, x1);
658 case N of
659 xmlOpenTag: Result := Result and AddChild.ParseXML(AData, X);
660 xmlIdentifier:
661 begin
662 if Text <> '' then
663 Text := Text + ' ';
664 Text := Text + parsestring(AData, X, True);
665 end;
666 else
667 Parsestring(AData, X);
668 end;
669 until skipspaces(adata, x) = #0;
670 // if not result then ShowMessage('Error Parsing: '+inttostr(X));
671 end;
672
673 function TYZXMLParser.Generate(var AData: string): Boolean;
674 var
675 i: Integer;
676 a: strarr;
677 begin
678 Header['Date'] := DateTimeToStr(now);
679 a := header.Keys;
680
681 AData := '<?xml';
682 for i := 0 to Length(a) - 1 do
683 AData := AData + ' ' + a[i] + '="' + Header[a[i]] + '"';
684
685 AData := AData + '?>'#13#10 + Text;
686 Result := True;
687 for i := 0 to Length(children) - 1 do
688 begin
689 Result := Result and children[i].generatexml(AData);
690 end;
691 end;
692
693 //==============================================================================
694
695 // procedures of THash class
696
697 //==============================================================================
698
699 {THASH CLASS}
700
701 procedure THash.Clear;
702 begin
703 SetLength(Arr, 0);
704 end;
705
706 constructor THash.Create;
707 begin
708 inherited;
709 Clear;
710 end;
711
712 //------------------------------------------------------------------------------
713
714 destructor THash.Destroy;
715 begin
716 Clear;
717 inherited;
718 end;
719
720 //------------------------------------------------------------------------------
721
722 function THash.GetCount: Integer;
723 begin
724 Result := Length(Arr);
725 end;
726
727 //------------------------------------------------------------------------------
728
729 function THash.Getempty: Boolean;
730 begin
731 Result := Length(Arr) = 0;
732 end;
733
734 function THash.GetKeys: StrArr;
735 var
736 i: Integer;
737 begin
738 SetLength(Result, Length(arr));
739 for i := 0 to Length(Result) - 1 do
740 Result[i] := arr[i].Key;
741 end;
742
743 //------------------------------------------------------------------------------
744
745 function THash.GetValue(Key: string): string;
746 var
747 i: Integer;
748 r: Boolean;
749 begin
750 Result := '';
751 i := 0;
752 r := False;
753 while (i < Length(Arr)) and (not r) do
754 begin
755 if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
756 begin
757 Result := Arr[i].Value;
758 r := True;
759 end;
760 i := i + 1;
761 end;
762 end;
763
764 //------------------------------------------------------------------------------
765
766 function THash.GetValues: StrArr;
767 var
768 i: Integer;
769 begin
770 SetLength(Result, Length(arr));
771 for i := 0 to Length(Result) - 1 do
772 Result[i] := arr[i].Value;
773 end;
774
775 //------------------------------------------------------------------------------
776
777 procedure THash.SetValue(Key: string; const VValue: string);
778 var
779 i, j: Integer;
780 r: Boolean;
781 E: THashElementArr;
782 begin
783 if VValue <> '' then
784 begin
785 i := 0;
786 r := False;
787 while (i < Length(Arr)) and not r do
788 begin
789 if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
790 begin
791 Arr[i].Value := VValue;
792 r := True;
793 end;
794 i := i + 1;
795 end;
796 if not r then
797 begin
798 SetLength(Arr, Length(arr) + 1);
799 arr[Length(arr) - 1].Key := Key;
800 arr[Length(arr) - 1].Value := Vvalue;
801 end;
802 end;
803
804 SetLength(E, Length(Arr));
805 for i := 0 to Length(arr) - 1 do
806 E[i] := Arr[i];
807 SetLength(arr, 0);
808 for i := 0 to Length(E) - 1 do
809 if (E[i].Key <> '') and (E[i].Value <> '') then
810 begin
811 j := Length(arr);
812 setlength(arr, j + 1);
813 arr[j] := E[i];
814 end;
815 end;
816
817 end.
|
|
|
|
|
Share this page
|
Advertisement |
|
|