Author: Ernesto De Spirito
A simple hyphenation algorithm to syllabicate Spanish words.
Answer:
Sometimes we need to display or print a text, and we'd like to hyphenate long words
that don't fit at the end of a line, to prevent them from falling entirely into the
next line leaving too much space unused.
The main problem that arises is how to divide a Spanish word in syllables. If your
are interested in syllabication for English words, read the note at the end of this
article.
1
2 procedure Syllabify(Syllables: TStringList; s: string);
3 const
4 Consonants = ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g', 'G',
5 'h', 'H', 'j', 'J', 'k', 'K', 'l', 'L', 'm', 'M', 'n', 'N',
6 'ñ', 'Ñ', 'p', 'P', 'q', 'Q', 'r', 'R', 's', 'S', 't', 'T',
7 'v', 'V', 'w', 'W', 'x', 'X', 'y', 'Y', 'z', 'Z'];
8 StrongVowels = ['a', 'A', 'á', 'Á', 'e', 'E', 'é', 'É',
9 'í', 'Í', 'o', 'ó', 'O', 'Ó', 'ú', 'Ú'];
10 WeakVowels = ['i', 'I', 'u', 'U', 'ü', 'Ü'];
11 Vowels = StrongVowels + WeakVowels;
12 Letters = Vowels + Consonants;
13 var
14 i, j, n, m, hyphen: integer;
15 begin
16 j := 2;
17 s := #0 + s + #0;
18 n := Length(s) - 1;
19 i := 2;
20 Syllables.Clear;
21 while i <= n do
22 begin
23 hyphen := 0; // Do not hyphenate
24 if s[i] in Consonants then
25 begin
26 if s[i + 1] in Vowels then
27 begin
28 if s[i - 1] in Vowels then
29 hyphen := 1;
30 end
31 else if (s[i + 1] in Consonants) and
32 (s[i - 1] in Vowels) then
33 begin
34 if s[i + 1] in ['r', 'R'] then
35 begin
36 if s[i] in ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g',
37 'G', 'k', 'K', 'p', 'P', 'r', 'R', 't', 'T', 'v', 'V'] then
38 hyphen := 1
39 else
40 hyphen := 2;
41 end
42 else if s[i + 1] in ['l', 'L'] then
43 begin
44 if s[i] in ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g',
45 'G', 'k', 'K', 'l', 'L', 'p', 'P', 't', 'T', 'v', 'V'] then
46 hyphen := 1
47 else
48 hyphen := 2;
49 end
50 else if s[i + 1] in ['h', 'H'] then
51 begin
52 if s[i] in ['c', 'C', 's', 'S', 'p', 'P'] then
53 hyphen := 1
54 else
55 hyphen := 2;
56 end
57 else
58 hyphen := 2;
59 end;
60 end
61 else if s[i] in StrongVowels then
62 begin
63 if (s[i - 1] in StrongVowels) then
64 hyphen := 1
65 end
66 else if s[i] = '-' then
67 begin
68 Syllables.Add(Copy(s, j, i - j));
69 Syllables.Add('-');
70 inc(i);
71 j := i;
72 end;
73 if hyphen = 1 then
74 begin // Hyphenate here
75 Syllables.Add(Copy(s, j, i - j));
76 j := i;
77 end
78 else if hyphen = 2 then
79 begin // Hyphenate after
80 inc(i);
81 Syllables.Add(Copy(s, j, i - j));
82 j := i;
83 end;
84 inc(i);
85 end;
86 m := Syllables.Count - 1;
87 if (j = n) and (m >= 0) and (s[n] in Consonants) then
88 Syllables[m] := Syllables[m] + s[n] // Last letter
89 else
90 Syllables.Add(Copy(s, j, n - j + 1)); // Last syllable
91 end;
To test the procedure yon can drop a Textbox and a Label on a form and in the
Change event of the Textbox write:
92
93 procedure TForm1.Edit1Change(Sender: TObject);
94 var
95 Syllables: TStringList;
96 begin
97 Syllables := TStringList.Create;
98 try
99 Syllabify(Syllables, Edit1.Text);
100 Label1.Caption := StringReplace(Trim(Syllables.Text),
101 #13#10, '-', [rfReplaceAll]);
102 finally
103 Syllables.Free;
104 end;
105 end;
Now that we have a syllabication procedure, we have to note that we can't hyphenate
a word in any syllable break. It is usually correct and/or desirable to join small
syllables at the left and/or right sides of a word to guarantee for example that
there are at least two syllables on either side of the word when it gets
hyphenated, or -like in the following example- to make sure that at least we have
four characters in either side:
106
107 procedure ApplyRules(Syllables: TStringList);
108 // Guarantee there are at least four letters in the left
109 // and right parts of the word
110 begin
111 with Syllables do
112 begin
113 if Count = 1 then
114 exit;
115 while Count > 1 do
116 begin
117 if Length(Strings[0]) >= 4 then
118 break;
119 Strings[0] := Strings[0] + Strings[1];
120 Delete(1);
121 end;
122 while Syllables.Count > 1 do
123 begin
124 if Length(Strings[Count - 1]) >= 4 then
125 break;
126 Strings[Count - 2] := Strings[Count - 2]
127 + Strings[Count - 1];
128 Delete(Count - 1);
129 end;
130 end;
131 end;
Finally, it comes the time to parse the text separating the lines of a paragraph
determining which words should be hyphenated. The following example does that with
a text to be displayed in a Memo:
132
133 procedure Hyphenate(Memo: TMemo; OriginalText: TStrings);
134 var
135 paragraph, i, j, k, m, n, MaxLineWidth: integer;
136 s, line: string;
137 Bitmap: TBitmap;
138 Canvas: TCanvas;
139 Syllables: TStringList;
140 begin
141 Syllables := TStringList.Create;
142 try
143 // We need a canvas to use its TextWidth method to get the width
144 // of the text to see if it fits in the client area or not. The
145 // TMemo class doesn't have a Canvas property, so we have to
146 // create one of our own.
147 Bitmap := TBitmap.Create;
148 Canvas := Bitmap.Canvas;
149 try
150 Canvas.Font := Memo.Font;
151 MaxLineWidth := Memo.ClientWidth - 6; // Maximum width
152 Memo.Lines.Clear;
153 for paragraph := 0 to OriginalText.Count - 1 do
154 begin
155 // For each paragraph
156 s := OriginalText[paragraph]; // Get the original paragraph
157 // Get the lines in which we have to break the paragraph
158 while Canvas.TextWidth(s) > MaxLineWidth do
159 begin
160 // First we find (in "j") the index of the start of the
161 // first word that doesn't fit (the one to hyphenate)
162 j := 1;
163 n := Length(s);
164 i := 2;
165 while i <= n do
166 begin
167 if (s[i - 1] = ' ') and (s[i] <> ' ') then
168 j := i; // last beginning of a word
169 if Canvas.TextWidth(Copy(s, 1, i)) > MaxLineWidth then
170 break; // reached a width that doesn't fit
171 inc(i);
172 end;
173 // Where does the break occurs?
174 if s[i] = ' ' then
175 begin
176 // Great! We break on a space
177 Memo.Lines.Add(Copy(s, 1, i - 1)); // Add the line
178 s := Copy(s, i + 1, n - i); // Remove the line
179 end
180 else
181 begin
182 // We break somewhere in a word. Now, we find (in "k")
183 // the first space after the word (k)
184 k := j + 1;
185 while (k <= n) and (s[k] <> ' ') do
186 inc(k);
187 // Divide the word in Syllables
188 Syllabify(Syllables, Copy(s, j, k - j));
189 ApplyRules(Syllables);
190 // Check (in "m") how many syllables fit
191 m := 0;
192 Line := Copy(s, 1, j - 1);
193 while Canvas.TextWidth(Line + Syllables[m] + '-')
194 <= MaxLineWidth do
195 begin
196 Line := Line + Syllables[m];
197 inc(m);
198 end;
199 if (m <> 0) and (Syllables[m - 1] <> '-') then
200 begin
201 // Hyphenate
202 Line := Line + '-';
203 j := Length(Line);
204 if Syllables[m] = '-' then
205 inc(j);
206 end;
207 Memo.Lines.Add(Line); // Add the line
208 s := Copy(s, j, n - j + 1); // Remove the line
209 end;
210 end;
211 Memo.Lines.Add(s); // Add the last line (it fits)
212 end;
213 finally
214 Bitmap.Free;
215 end;
216 finally
217 Syllables.Free;
218 end;
219 end;
To test the procedure, drop a Memo component on a form, align it for example to the
top of the form (Align = alTop) and write the following code in the OnResize event
of the form:
220
221 procedure TForm1.FormResize(Sender: TObject);
222 var
223 OriginalText: TStringList;
224 begin
225 OriginalText := TStringList.Create;
226 try
227 OriginalText.Add('Si se ha preguntado cómo hacen los '
228 + 'programas procesamiento de textos para dividir palabras '
229 + 'con de guiones al final de una línea, he aquí un '
230 + 'ejemplo sencillo (en comparación con los que usan las '
231 + 'aplicaciones de procesamiento de textos).');
232 OriginalText.Add('Este es un segundo párrafo que se provee '
233 + 'con fines de ejemplo.');
234 Hyphenate(Memo1, OriginalText);
235 finally
236 OriginalText.Free;
237 end;
238 end;
NOTE:
English words are hyphenated phonetically, so the process would have two phases:
produce a phonetic representation of the word using pronunciation rules; and
perform the hyphenation of the phonetic representation using hyphenation rules (and
parallelly apply that to the original word).
There are rules for both things, and also exceptions, so a small exceptions
dictionary may be needed. Of course, it's all easier said than done. I realize it
is somewhat complex, but I still believe it is possible to syllabicate English
words algorithmically.
Copyright (c) 2001 Ernesto De Spiritomailto:edspirito@latiumsoftware.com
Visit: http://www.latiumsoftware.com/delphi-newsletter.php
|