Articles   Members Online: 3
-Article/Tip Search
-News Group Search over 21 Million news group articles.
-Delphi/Pascal
-CBuilder/C++
-C#Builder/C#
-JBuilder/Java
-Kylix
Member Area
-Home
-Account Center
-Top 10 NEW!!
-Submit Article/Tip
-Forums Upgraded!!
-My Articles
-Edit Information
-Login/Logout
-Become a Member
-Why sign up!
-Newsletter
-Chat Online!
-Indexes NEW!!
Employment
-Build your resume
-Find a job
-Post a job
-Resume Search
Contacts
-Contacts
-Feedbacks
-Link to us
-Privacy/Disclaimer
Embarcadero
Visit Embarcadero
Embarcadero Community
JEDI
Links
How to use Hyphenation - Dividing Spanish words in syllables Turn on/off line numbers in source code. Switch to Orginial background IDE or DSP color Comment or reply to this aritlce/tip for discussion. Bookmark this article to my favorite article(s). Print this article
23-May-03
Category
Reporting /Printing
Language
Delphi 2.x
Views
76
User Rating
No Votes
# Votes
0
Replies
0
Publisher:
DSP, Administrator
Reference URL:
DKB
			Author: Ernesto De Spirito

A simple hyphenation algorithm to syllabicate Spanish words.

Answer:

Sometimes we need to display or print a text, and we'd like to hyphenate long words 
that don't fit at the end of a line, to prevent them from falling entirely into the 
next line leaving too much space unused.

The main problem that arises is how to divide a Spanish word in syllables. If your 
are interested in syllabication for English words, read the note at the end of this 
article.
1   
2   procedure Syllabify(Syllables: TStringList; s: string);
3   const
4     Consonants = ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g', 'G',
5       'h', 'H', 'j', 'J', 'k', 'K', 'l', 'L', 'm', 'M', 'n', 'N',
6       'ñ', 'Ñ', 'p', 'P', 'q', 'Q', 'r', 'R', 's', 'S', 't', 'T',
7       'v', 'V', 'w', 'W', 'x', 'X', 'y', 'Y', 'z', 'Z'];
8     StrongVowels = ['a', 'A', 'á', 'Á', 'e', 'E', 'é', 'É',
9       'í', 'Í', 'o', 'ó', 'O', 'Ó', 'ú', 'Ú'];
10    WeakVowels = ['i', 'I', 'u', 'U', 'ü', 'Ü'];
11    Vowels = StrongVowels + WeakVowels;
12    Letters = Vowels + Consonants;
13  var
14    i, j, n, m, hyphen: integer;
15  begin
16    j := 2;
17    s := #0 + s + #0;
18    n := Length(s) - 1;
19    i := 2;
20    Syllables.Clear;
21    while i <= n do
22    begin
23      hyphen := 0; // Do not hyphenate
24      if s[i] in Consonants then
25      begin
26        if s[i + 1] in Vowels then
27        begin
28          if s[i - 1] in Vowels then
29            hyphen := 1;
30        end
31        else if (s[i + 1] in Consonants) and
32          (s[i - 1] in Vowels) then
33        begin
34          if s[i + 1] in ['r', 'R'] then
35          begin
36            if s[i] in ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g',
37              'G', 'k', 'K', 'p', 'P', 'r', 'R', 't', 'T', 'v', 'V'] then
38              hyphen := 1
39            else
40              hyphen := 2;
41          end
42          else if s[i + 1] in ['l', 'L'] then
43          begin
44            if s[i] in ['b', 'B', 'c', 'C', 'd', 'D', 'f', 'F', 'g',
45              'G', 'k', 'K', 'l', 'L', 'p', 'P', 't', 'T', 'v', 'V'] then
46              hyphen := 1
47            else
48              hyphen := 2;
49          end
50          else if s[i + 1] in ['h', 'H'] then
51          begin
52            if s[i] in ['c', 'C', 's', 'S', 'p', 'P'] then
53              hyphen := 1
54            else
55              hyphen := 2;
56          end
57          else
58            hyphen := 2;
59        end;
60      end
61      else if s[i] in StrongVowels then
62      begin
63        if (s[i - 1] in StrongVowels) then
64          hyphen := 1
65      end
66      else if s[i] = '-' then
67      begin
68        Syllables.Add(Copy(s, j, i - j));
69        Syllables.Add('-');
70        inc(i);
71        j := i;
72      end;
73      if hyphen = 1 then
74      begin // Hyphenate here
75        Syllables.Add(Copy(s, j, i - j));
76        j := i;
77      end
78      else if hyphen = 2 then
79      begin // Hyphenate after
80        inc(i);
81        Syllables.Add(Copy(s, j, i - j));
82        j := i;
83      end;
84      inc(i);
85    end;
86    m := Syllables.Count - 1;
87    if (j = n) and (m >= 0) and (s[n] in Consonants) then
88      Syllables[m] := Syllables[m] + s[n] // Last letter
89    else
90      Syllables.Add(Copy(s, j, n - j + 1)); // Last syllable
91  end;


To test the procedure yon can drop a Textbox and a Label on a form and in the 
Change event of the Textbox write: 
92  
93  procedure TForm1.Edit1Change(Sender: TObject);
94  var
95    Syllables: TStringList;
96  begin
97    Syllables := TStringList.Create;
98    try
99      Syllabify(Syllables, Edit1.Text);
100     Label1.Caption := StringReplace(Trim(Syllables.Text),
101       #13#10, '-', [rfReplaceAll]);
102   finally
103     Syllables.Free;
104   end;
105 end;


Now that we have a syllabication procedure, we have to note that we can't hyphenate 
a word in any syllable break. It is usually correct and/or desirable to join small 
syllables at the left and/or right sides of a word to guarantee for example that 
there are at least two syllables on either side of the word when it gets 
hyphenated, or -like in the following example- to make sure that at least we have 
four characters in either side: 
106 
107 procedure ApplyRules(Syllables: TStringList);
108 // Guarantee there are at least four letters in the left
109 // and right parts of the word
110 begin
111   with Syllables do
112   begin
113     if Count = 1 then
114       exit;
115     while Count > 1 do
116     begin
117       if Length(Strings[0]) >= 4 then
118         break;
119       Strings[0] := Strings[0] + Strings[1];
120       Delete(1);
121     end;
122     while Syllables.Count > 1 do
123     begin
124       if Length(Strings[Count - 1]) >= 4 then
125         break;
126       Strings[Count - 2] := Strings[Count - 2]
127         + Strings[Count - 1];
128       Delete(Count - 1);
129     end;
130   end;
131 end;


Finally, it comes the time to parse the text separating the lines of a paragraph 
determining which words should be hyphenated. The following example does that with 
a text to be displayed in a Memo: 
132 
133 procedure Hyphenate(Memo: TMemo; OriginalText: TStrings);
134 var
135   paragraph, i, j, k, m, n, MaxLineWidth: integer;
136   s, line: string;
137   Bitmap: TBitmap;
138   Canvas: TCanvas;
139   Syllables: TStringList;
140 begin
141   Syllables := TStringList.Create;
142   try
143     // We need a canvas to use its TextWidth method to get the width
144     // of the text to see if it fits in the client area or not. The
145     // TMemo class doesn't have a Canvas property, so we have to
146     // create one of our own.
147     Bitmap := TBitmap.Create;
148     Canvas := Bitmap.Canvas;
149     try
150       Canvas.Font := Memo.Font;
151       MaxLineWidth := Memo.ClientWidth - 6; // Maximum width
152       Memo.Lines.Clear;
153       for paragraph := 0 to OriginalText.Count - 1 do
154       begin
155         // For each paragraph
156         s := OriginalText[paragraph]; // Get the original paragraph
157         // Get the lines in which we have to break the paragraph
158         while Canvas.TextWidth(s) > MaxLineWidth do
159         begin
160           // First we find (in "j") the index of the start of the
161           // first word that doesn't fit (the one to hyphenate)
162           j := 1;
163           n := Length(s);
164           i := 2;
165           while i <= n do
166           begin
167             if (s[i - 1] = ' ') and (s[i] <> ' ') then
168               j := i; // last beginning of a word
169             if Canvas.TextWidth(Copy(s, 1, i)) > MaxLineWidth then
170               break; // reached a width that doesn't fit
171             inc(i);
172           end;
173           // Where does the break occurs?
174           if s[i] = ' ' then
175           begin
176             // Great! We break on a space
177             Memo.Lines.Add(Copy(s, 1, i - 1)); // Add the line
178             s := Copy(s, i + 1, n - i); // Remove the line
179           end
180           else
181           begin
182             // We break somewhere in a word. Now, we find (in "k")
183             // the first space after the word (k)
184             k := j + 1;
185             while (k <= n) and (s[k] <> ' ') do
186               inc(k);
187             // Divide the word in Syllables
188             Syllabify(Syllables, Copy(s, j, k - j));
189             ApplyRules(Syllables);
190             // Check (in "m") how many syllables fit
191             m := 0;
192             Line := Copy(s, 1, j - 1);
193             while Canvas.TextWidth(Line + Syllables[m] + '-')
194               <= MaxLineWidth do
195             begin
196               Line := Line + Syllables[m];
197               inc(m);
198             end;
199             if (m <> 0) and (Syllables[m - 1] <> '-') then
200             begin
201               // Hyphenate
202               Line := Line + '-';
203               j := Length(Line);
204               if Syllables[m] = '-' then
205                 inc(j);
206             end;
207             Memo.Lines.Add(Line); // Add the line
208             s := Copy(s, j, n - j + 1); // Remove the line
209           end;
210         end;
211         Memo.Lines.Add(s); // Add the last line (it fits)
212       end;
213     finally
214       Bitmap.Free;
215     end;
216   finally
217     Syllables.Free;
218   end;
219 end;


To test the procedure, drop a Memo component on a form, align it for example to the 
top of the form (Align = alTop) and write the following code in the OnResize event 
of the form: 
220 
221 procedure TForm1.FormResize(Sender: TObject);
222 var
223   OriginalText: TStringList;
224 begin
225   OriginalText := TStringList.Create;
226   try
227     OriginalText.Add('Si se ha preguntado cómo hacen los '
228       + 'programas procesamiento de textos para dividir palabras '
229       + 'con de guiones al final de una línea, he aquí un '
230       + 'ejemplo sencillo (en comparación con los que usan las '
231       + 'aplicaciones de procesamiento de textos).');
232     OriginalText.Add('Este es un segundo párrafo que se provee '
233       + 'con fines de ejemplo.');
234     Hyphenate(Memo1, OriginalText);
235   finally
236     OriginalText.Free;
237   end;
238 end;


NOTE:

English words are hyphenated phonetically, so the process would have two phases:

produce a phonetic representation of the word using pronunciation rules; and 
perform the hyphenation of the phonetic representation using hyphenation rules (and 
parallelly apply that to the original word). 

There are rules for both things, and also exceptions, so a small exceptions 
dictionary may be needed. Of course, it's all easier said than done. I realize it 
is somewhat complex, but I still believe it is possible to syllabicate English 
words algorithmically.

Copyright (c) 2001 Ernesto De Spiritomailto:edspirito@latiumsoftware.com
Visit: http://www.latiumsoftware.com/delphi-newsletter.php

			
Vote: How useful do you find this Article/Tip?
Bad Excellent
1 2 3 4 5 6 7 8 9 10

 

Advertisement
Share this page
Advertisement
Download from Google

Copyright © Mendozi Enterprises LLC