OSDN Git Service

* All files: Updated copyright information.
[pf3gnuchains/gcc-fork.git] / libjava / gnu / gcj / text / SentenceBreakIterator.java
1 // Default sentence BreakIterator.
2
3 /* Copyright (C) 1999  Free Software Foundation
4
5    This file is part of libgcj.
6
7 This software is copyrighted work licensed under the terms of the
8 Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
9 details.  */
10
11 package gnu.gcj.text;
12
13 import java.text.BreakIterator;
14 import java.text.CharacterIterator;
15
16 /**
17  * @author Tom Tromey <tromey@cygnus.com>
18  * @date March 23, 1999
19  * Written using The Unicode Standard, Version 2.0.
20  */
21
22 public class SentenceBreakIterator extends BaseBreakIterator
23 {
24   public Object clone ()
25   {
26     return new SentenceBreakIterator (this);
27   }
28
29   public SentenceBreakIterator ()
30   {
31     iter = null;
32   }
33
34   private SentenceBreakIterator (SentenceBreakIterator other)
35   {
36     iter = (CharacterIterator) other.iter.clone();
37   }
38
39   public int next ()
40   {
41     int end = iter.getEndIndex();
42     if (iter.getIndex() == end)
43       return DONE;
44
45     while (iter.getIndex() < end)
46       {
47         char c = iter.current();
48         if (c == CharacterIterator.DONE)
49           break;
50         int type = Character.getType(c);
51
52         char n = iter.next();
53         if (n == CharacterIterator.DONE)
54           break;
55
56         // Always break after paragraph separator.
57         if (type == Character.PARAGRAPH_SEPARATOR)
58           break;
59
60         if (c == '!' || c == '?')
61           {
62             // Skip close punctuation.
63             while (n != CharacterIterator.DONE
64                    && Character.getType(n) == Character.END_PUNCTUATION)
65               n = iter.next();
66             // Skip spaces.
67             while (n != CharacterIterator.DONE
68                    && Character.getType(n) == Character.SPACE_SEPARATOR)
69               n = iter.next();
70             // Skip optional paragraph separator.
71             if (n != CharacterIterator.DONE
72                 && Character.getType(n) == Character.PARAGRAPH_SEPARATOR)
73               n = iter.next();
74
75             // There's always a break somewhere after `!' or `?'.
76             break;
77           }
78
79         if (c == '.')
80           {
81             int save = iter.getIndex();
82             // Skip close punctuation.
83             while (n != CharacterIterator.DONE
84                    && Character.getType(n) == Character.END_PUNCTUATION)
85               n = iter.next();
86             // Skip spaces.  We keep count because we need at least
87             // one for this period to represent a terminator.
88             int spcount = 0;
89             while (n != CharacterIterator.DONE
90                    && Character.getType(n) == Character.SPACE_SEPARATOR)
91               {
92                 n = iter.next();
93                 ++spcount;
94               }
95             if (spcount > 0)
96               {
97                 int save2 = iter.getIndex();
98                 // Skip over open puncutation.
99                 while (n != CharacterIterator.DONE
100                        && Character.getType(n) == Character.START_PUNCTUATION)
101                   n = iter.next();
102                 // Next character must not be lower case.
103                 if (n == CharacterIterator.DONE
104                     || ! Character.isLowerCase(n))
105                   {
106                     iter.setIndex(save2);
107                     break;
108                   }
109               }
110             iter.setIndex(save);
111           }
112       }
113
114     return iter.getIndex();
115   }
116
117   private final int previous_internal ()
118   {
119     int start = iter.getBeginIndex();
120     if (iter.getIndex() == start)
121       return DONE;
122
123     while (iter.getIndex() >= start)
124       {
125         char c = iter.previous();
126         if (c == CharacterIterator.DONE)
127           break;
128
129         char n = iter.previous();
130         if (n == CharacterIterator.DONE)
131           break;
132         iter.next();
133         int nt = Character.getType(n);
134
135         if (! Character.isLowerCase(c)
136             && (nt == Character.START_PUNCTUATION
137                 || nt == Character.SPACE_SEPARATOR))
138           {
139             int save = iter.getIndex();
140             int save_nt = nt;
141             char save_n = n;
142             // Skip open punctuation.
143             while (n != CharacterIterator.DONE
144                    && Character.getType(n) == Character.START_PUNCTUATION)
145               n = iter.previous();
146             if (n == CharacterIterator.DONE)
147               break;
148             if (Character.getType(n) == Character.SPACE_SEPARATOR)
149               {
150                 // Must have at least once space after the `.'.
151                 int save2 = iter.getIndex();
152                 while (n != CharacterIterator.DONE
153                        && Character.getType(n) == Character.SPACE_SEPARATOR)
154                   n = iter.previous();
155                 // Skip close punctuation.
156                 while (n != CharacterIterator.DONE
157                        && Character.getType(n) == Character.END_PUNCTUATION)
158                   n = iter.previous();
159                 if (n == CharacterIterator.DONE || n == '.')
160                   {
161                     // Communicate location of actual end.
162                     period = iter.getIndex();
163                     iter.setIndex(save2);
164                     break;
165                   }
166               }
167             iter.setIndex(save);
168             nt = save_nt;
169             n = save_n;
170           }
171
172         if (nt == Character.PARAGRAPH_SEPARATOR)
173           {
174             // Communicate location of actual end.
175             period = iter.getIndex();
176             break;
177           }
178         else if (nt == Character.SPACE_SEPARATOR
179                  || nt == Character.END_PUNCTUATION)
180           {
181             int save = iter.getIndex();
182             // Skip spaces.
183             while (n != CharacterIterator.DONE
184                    && Character.getType(n) == Character.SPACE_SEPARATOR)
185               n = iter.previous();
186             // Skip close punctuation.
187             while (n != CharacterIterator.DONE
188                    && Character.getType(n) == Character.END_PUNCTUATION)
189               n = iter.previous();
190             int here = iter.getIndex();
191             iter.setIndex(save);
192             if (n == CharacterIterator.DONE || n == '!' || n == '?')
193               {
194                 // Communicate location of actual end.
195                 period = here;
196                 break;
197               }
198           }
199         else if (n == '!' || n == '?')
200           {
201             // Communicate location of actual end.
202             period = iter.getIndex();
203             break;
204           }
205       }
206
207     return iter.getIndex();
208   }
209
210   public int previous ()
211   {
212     // We want to skip over the first sentence end to the second one.
213     // However, at the end of the string we want the first end.
214     int here = iter.getIndex();
215     period = here;
216     int first = previous_internal ();
217     if (here == iter.getEndIndex() || first == DONE)
218       return first;
219     iter.setIndex(period);
220     return previous_internal ();
221   }
222
223   // This is used for communication between previous and
224   // previous_internal.
225   private int period;
226 }