Author: Jonas Bilinkevicius
Is there a way to convert accented characters to unaccented (meaning ASCII A - Z, a
- z)?
Answer:
The classical way is to have a conversion table and do a lookup in that table. The
problem with that is that the table is of course specific to a certain charset
(encoding), like Windows Latin-1. You could build a table for a range of UNICODE
(widechar) characters to get around this limitation and convert the strings to
widestrings before you do the accent removals. The routine below uses ANSI
characters with the Windows western (Latin-1) encoding.
1 function SimplifyChar(const _ch: char): char;
2 const
3 Charmap: array[#128..#255] of Char = (
4 #128 { ? }, #129 { ? }, #130 { ? }, #131 { ? }, #132 { ? },
5 #133 { ? }, #134 { ? }, #135 { ? }, #136 { ? }, #137 { ? },
6 #138 { ? }, #139 { ? }, #140 { ? }, #141 { ? }, #142 { ? },
7 #143 { ? }, #144 { ? }, #145 { ? }, #146 { ? }, #147 { ? },
8 #148 { ? }, #149 { ? }, #150 { ? }, #151 { ? }, #152 { ? },
9 #153 { ? }, #154 { ? }, #155 { ? }, #156 { ? }, #157 { ? },
10 #158 { ? }, #159 { ? }, #160 { }, #161 { ¡ }, #162 { ¢ },
11 #163 { £ }, #164 { ¤ }, #165 { ¥ }, #166 { ¦ }, #167 { § },
12 #168 { ¨ }, #169 { © }, #170 { ª }, #171 { « }, #172 { ¬ },
13 #173 { }, #174 { ® }, #175 { ¯ }, #176 { ° }, #177 { ± },
14 #178 { ² }, #179 { ³ }, #180 { ´ }, #181 { µ }, #182 { ¶ },
15 #183 { · }, #184 { ¸ }, #185 { ¹ }, #186 { º }, #187 { » },
16 #188 { ¼ }, #189 { ½ }, #190 { ¾ }, #191 { ¿ }, 'A' { À },
17 'A' { Á }, 'A' { Â }, 'A' { Ã }, 'A' { Ä }, 'A' { Å },
18 #198 { Æ }, #199 { Ç }, 'E' { È }, 'E' { É }, 'E' { Ê },
19 'E' { Ë }, 'I' { Ì }, 'I' { Í }, 'I' { Î }, 'I' { Ï },
20 #208 { Ð }, #209 { Ñ }, 'O' { Ò }, 'O' { Ó }, 'O' { Ô },
21 'O' { Õ }, 'O' { Ö }, #215 { × }, #216 { Ø }, 'U' { Ù },
22 'U' { Ú }, 'U' { Û }, 'U' { Ü }, #221 { Ý }, #222 { Þ },
23 #223 { ß }, 'a' { à }, 'a' { á }, 'a' { â }, 'a' { ã },
24 'a' { ä }, 'a' { å }, #230 { æ }, #231 { ç }, 'e' { è },
25 'e' { é }, 'e' { ê }, 'e' { ë }, 'i' { ì }, 'i' { í },
26 'i' { î }, 'i' { ï }, #240 { ð }, #241 { ñ }, 'o' { ò },
27 'o' { ó }, 'o' { ô }, 'o' { õ }, 'o' { ö }, #247 { ÷ },
28 #248 { ø }, 'u' { ù }, 'u' { ú }, 'u' { û }, 'u' { ü },
29 #253 { ý }, #254 { þ }, #255 { ÿ }
30 );
31 begin
32 if _ch >= #128 then
33 Result := Charmap[_ch]
34 else
35 Result := _ch;
36 end;
37
38 //The charmap table was created by this little routine and then edited:
39
40 procedure CreateCharacterMap(fromchar, tochar: Char);
41
42 function DisplayStr(const ch: Char): string;
43 begin
44 if ch < #32 then
45 Result := '^' + Chr(Ord('A') - 1 + Ord(ch))
46 else
47 Result := ch;
48 end;
49
50 var
51 sl: TStringlist;
52 line, element: string;
53 ch: char;
54 begin
55 Assert(fromchar <= tochar);
56 sl := Tstringlist.Create;
57 try
58 sl.Add('Const');
59 line := Format(' Charmap: array [#%d..#%d] of Char = (', [Ord(fromchar),
60 Ord(tochar)]);
61 sl.Add(line);
62 line := '';
63 for ch := fromchar to toChar do
64 begin
65 element := Format('#%3.3d { %s }', [Ord(ch), DisplayStr(ch)]);
66 if (Length(line) + Length(element)) > 66 then
67 begin
68 sl.Add(' ' + line);
69 line := '';
70 end;
71 line := line + element;
72 if ch <> tochar then
73 line := line + ', ';
74 end;
75 sl.Add(' ' + line);
76 sl.add(' );');
77 Clipboard.AsText := sl.Text;
78 finally
79 sl.Free
80 end;
81 end;
|