Author: Tomas Rutkauskas
How can I parse a TRichEdit for domains ending in .com, .net, .org?
Answer:
Solve 1:
Not extensively tested:
1
2 procedure TForm1.Button1Click(Sender: TObject);
3 const
4 charsAllowedInDomain = ['a'..'z', '0'..'9', '.', '_']; {may be more}
5 numExts = 4;
6 domainExts: array[1..numExts] of Pchar = ('.com', '.net', '.org', '.gov'); {lower
7 case!}
8 lens: array[1..numExts] of Integer = (4, 4, 4, 4);
9 var
10 S: string;
11 pStartString, pScan, pStartDomain, pEndDomain: Pchar;
12 domain: string;
13 i: Integer;
14 begin
15 S := AnsiLowerCase(richedit1.text);
16 pStartString := PChar(S);
17 pScan := pStartString;
18 while pScan^ <> #0 do
19 begin
20 if pScan^ = '.' then
21 begin
22 for i := Low(domainExts) to High(domainExts) do
23 if StrLComp(pScan, domainExts[i], lens[i]) = 0 then
24 begin
25 {we have a candidate}
26 pStartDomain := pScan;
27 pEndDomain := pScan + lens[i];
28 if not (pEndDomain^ in charsAllowedInDomain) then
29 begin
30 while (pStartDomain > pStartString) and (pStartDomain[-1] in
31 charsAllowedInDomain) do
32 Dec(pStartDomain);
33 SetString(domain, pStartDomain, pEndDomain - pStartDomain);
34 listbox1.items.add(domain);
35 pScan := pEndDomain - 1;
36 break;
37 end;
38 end;
39 end;
40 Inc(pScan);
41 end;
42 end;
Solve 2:
43 { ... }
44 type {declared in richedit.pas D3}
45
46 TCharRange = record
47 cpMin: Longint;
48 cpMax: LongInt;
49 end;
50
51 TFindTextExA = record {declared in richedit.pas D3}
52 chrg: TCharRange;
53 lpstrText: PAnsiChar;
54 chrgText: TCharRange;
55 end;
56
57 procedure REFindDomain(RE: TRichEdit; const Target: string; Strs: TStrings);
58 const
59 {maybe more than these?}
60 ValidChars: set of char = ['a'..'z', 'A'..'Z', '0'..'9', '.', '/', ':', '_', '-'];
61 var
62 ftx: TFindTextExA;
63 flags: longint;
64 charpos: longint;
65 s: string;
66 begin
67 if (Target = '') then
68 exit; {nothing to look for}
69 {searches all of the RichEdit}
70 ftx.chrg.cpMin := 0;
71 ftx.chrg.cpMax := -1;
72 ftx.lpstrText := PChar(Target);
73 ftx.chrgText.cpMin := 0;
74 ftx.chrgText.cpMax := 0;
75 flags := 0;
76 // EM_FINDTEXTEX = WM_USER + 79; {declared in richedit.pas D3}
77 while SendMessage(RE.Handle, WM_USER + 79, flags, longint(@ftx)) > -1 do
78 begin
79 RE.SelStart := ftx.chrgText.cpMin; {found at position}
80 RE.SelLength := Length(Target);
81 {get the line}
82 if ftx.chrgText.cpMax >= 255 then
83 s := Copy(RE.Lines.Text, ftx.chrgText.cpMax - 254, 255)
84 else
85 s := Copy(RE.Lines.Text, 1, ftx.chrgText.cpMax);
86 {need to find start of domain name}
87 charpos := Length(s);
88 while (charpos > 1) and (s[charpos] in ValidChars) do
89 Dec(charpos);
90 if not (s[charpos] in ValidChars) then
91 Inc(charpos);
92 Strs.Add(Copy(s, charpos, Length(s)));
93 ftx.chrg.cpMin := ftx.chrgText.cpMin + 1; {reset to found at pos}
94 end;
95 end;
96
97 {ListBox1 contains 3 lines: '.com' '.net' '.org', ListBox2 receives the results}
98
99 procedure TForm1.Button1Click(Sender: TObject);
100 var
101 i: integer;
102 begin
103 if ListBox1.Items.Count > 0 then
104 begin
105 ListBox2.Clear;
106 for i := 0 to ListBox1.Items.Count - 1 do
107 begin
108 REFindDomain(RichEdit1, ListBox1.Items[i], ListBox2.Items);
109 end;
110 Label1.Caption := IntToStr(ListBox2.Items.Count);
111 end;
112 end;
|