1 /* CharacterBreakIterator.java - Default character BreakIterator.
2 Copyright (C) 1999, 2001 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 As a special exception, if you link this library with other files to
22 produce an executable, this library does not by itself cause the
23 resulting executable to be covered by the GNU General Public License.
24 This exception does not however invalidate any other reasons why the
25 executable file might be covered by the GNU General Public License. */
28 package gnu.java.text;
30 import java.text.BreakIterator;
31 import java.text.CharacterIterator;
34 * @author Tom Tromey <tromey@cygnus.com>
35 * @date March 19, 1999
36 * Written using The Unicode Standard, Version 2.0.
39 public class CharacterBreakIterator extends BaseBreakIterator
41 // Hangul Jamo constants from Unicode book.
42 private static final int LBase = 0x1100;
43 private static final int VBase = 0x1161;
44 private static final int TBase = 0x11a7;
45 private static final int LCount = 19;
46 private static final int VCount = 21;
47 private static final int TCount = 28;
49 // Information about surrogates.
50 private static final int highSurrogateStart = 0xD800;
51 private static final int highSurrogateEnd = 0xDBFF;
52 private static final int lowSurrogateStart = 0xDC00;
53 private static final int lowSurrogateEnd = 0xDFFF;
55 public Object clone ()
57 return new CharacterBreakIterator (this);
60 public CharacterBreakIterator ()
62 iter = null; // FIXME?
65 private CharacterBreakIterator (CharacterBreakIterator other)
67 iter = (CharacterIterator) other.iter.clone();
70 // Some methods to tell us different properties of characters.
71 private final boolean isL (char c)
73 return c >= LBase && c <= LBase + LCount;
75 private final boolean isV (char c)
77 return c >= VBase && c <= VBase + VCount;
79 private final boolean isT (char c)
81 return c >= TBase && c <= TBase + TCount;
83 private final boolean isLVT (char c)
85 return isL (c) || isV (c) || isT (c);
87 private final boolean isHighSurrogate (char c)
89 return c >= highSurrogateStart && c <= highSurrogateEnd;
91 private final boolean isLowSurrogate (char c)
93 return c >= lowSurrogateStart && c <= lowSurrogateEnd;
98 int end = iter.getEndIndex();
99 if (iter.getIndex() == end)
103 for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c)
106 if (c == CharacterIterator.DONE)
108 int type = Character.getType(c);
110 // Break after paragraph separators.
111 if (type == Character.PARAGRAPH_SEPARATOR)
114 // Now we need some lookahead.
115 char ahead = iter.next();
117 if (ahead == CharacterIterator.DONE)
119 int aheadType = Character.getType(ahead);
121 if (aheadType != Character.NON_SPACING_MARK
122 && ! isLowSurrogate (ahead)
125 if (! isLVT (c) && isLVT (ahead))
127 if (isL (c) && ! isLVT (ahead)
128 && aheadType != Character.NON_SPACING_MARK)
130 if (isV (c) && ! isV (ahead) && !isT (ahead)
131 && aheadType != Character.NON_SPACING_MARK)
133 if (isT (c) && ! isT (ahead)
134 && aheadType != Character.NON_SPACING_MARK)
137 if (! isHighSurrogate (c) && isLowSurrogate (ahead))
139 if (isHighSurrogate (c) && ! isLowSurrogate (ahead))
141 if (! isHighSurrogate (prev) && isLowSurrogate (c))
145 return iter.getIndex();
148 public int previous ()
150 if (iter.getIndex() == iter.getBeginIndex())
153 int start = iter.getBeginIndex();
154 while (iter.getIndex() >= iter.getBeginIndex())
156 char c = iter.previous();
157 if (c == CharacterIterator.DONE)
159 int type = Character.getType(c);
161 if (type != Character.NON_SPACING_MARK
162 && ! isLowSurrogate (c)
166 // Now we need some lookahead.
167 char ahead = iter.previous();
168 if (ahead == CharacterIterator.DONE)
173 char ahead2 = iter.previous();
176 if (ahead2 == CharacterIterator.DONE)
178 int aheadType = Character.getType(ahead);
180 if (aheadType == Character.PARAGRAPH_SEPARATOR)
183 if (isLVT (c) && ! isLVT (ahead))
185 if (! isLVT (c) && type != Character.NON_SPACING_MARK
188 if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK
191 if (! isT (c) && type != Character.NON_SPACING_MARK
195 if (isLowSurrogate (c) && ! isHighSurrogate (ahead))
197 if (! isLowSurrogate (c) && isHighSurrogate (ahead))
199 if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2))
203 return iter.getIndex();