1 /* gnu/regexp/RETokenNamedProperty.java
2 Copyright (C) 2006 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
39 package gnu.java.util.regex;
41 final class RETokenNamedProperty extends REToken {
48 static final byte[] LETTER = new byte[]
49 { Character.LOWERCASE_LETTER,
50 Character.UPPERCASE_LETTER,
51 Character.TITLECASE_LETTER,
52 Character.MODIFIER_LETTER,
53 Character.OTHER_LETTER };
55 static final byte[] MARK = new byte[]
56 { Character.NON_SPACING_MARK,
57 Character.COMBINING_SPACING_MARK,
58 Character.ENCLOSING_MARK };
60 static final byte[] SEPARATOR = new byte[]
61 { Character.SPACE_SEPARATOR,
62 Character.LINE_SEPARATOR,
63 Character.PARAGRAPH_SEPARATOR };
65 static final byte[] SYMBOL = new byte[]
66 { Character.MATH_SYMBOL,
67 Character.CURRENCY_SYMBOL,
68 Character.MODIFIER_SYMBOL,
69 Character.OTHER_SYMBOL };
71 static final byte[] NUMBER = new byte[]
72 { Character.DECIMAL_DIGIT_NUMBER,
73 Character.LETTER_NUMBER,
74 Character.OTHER_NUMBER };
76 static final byte[] PUNCTUATION = new byte[]
77 { Character.DASH_PUNCTUATION,
78 Character.START_PUNCTUATION,
79 Character.END_PUNCTUATION,
80 Character.CONNECTOR_PUNCTUATION,
81 Character.OTHER_PUNCTUATION,
82 Character.INITIAL_QUOTE_PUNCTUATION,
83 Character.FINAL_QUOTE_PUNCTUATION};
85 static final byte[] OTHER = new byte[]
88 Character.PRIVATE_USE,
90 Character.UNASSIGNED };
92 RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
97 handler = getHandler(name);
100 int getMinimumLength() {
104 int getMaximumLength() {
108 REMatch matchThis(CharIndexed input, REMatch mymatch) {
109 char ch = input.charAt(mymatch.index);
110 boolean retval = matchOneChar(ch);
118 private boolean matchOneChar(char ch) {
119 if (ch == CharIndexed.OUT_OF_BOUNDS)
122 boolean retval = handler.includes(ch);
125 handler.includes(toUpperCase(ch, unicodeAware)) ||
126 handler.includes(toLowerCase(ch, unicodeAware));
129 if (negate) retval = !retval;
133 boolean returnsFixedLengthMatches() { return true; }
135 int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
136 int index = mymatch.index;
139 if (numRepeats >= max) break;
140 char ch = input.charAt(index++);
141 if (! matchOneChar(ch)) break;
147 void dump(StringBuffer os) {
149 .append(negate ? "P" : "p")
150 .append("{" + name + "}");
153 private abstract static class Handler {
154 public abstract boolean includes(char c);
157 private Handler getHandler(String name) throws REException {
158 if (name.equals("Lower") ||
159 name.equals("Upper") ||
160 // name.equals("ASCII") ||
161 name.equals("Alpha") ||
162 name.equals("Digit") ||
163 name.equals("Alnum") ||
164 name.equals("Punct") ||
165 name.equals("Graph") ||
166 name.equals("Print") ||
167 name.equals("Blank") ||
168 name.equals("Cntrl") ||
169 name.equals("XDigit") ||
170 name.equals("Space") ) {
171 return new POSIXHandler(name);
173 if (name.startsWith("In")) {
175 name = name.substring(2);
176 Character.UnicodeBlock block = Character.UnicodeBlock.forName(name);
177 return new UnicodeBlockHandler(block);
179 catch (IllegalArgumentException e) {
180 throw new REException("Invalid Unicode block name: " + name, REException.REG_ESCAPE, 0);
183 if (name.startsWith("Is")) {
184 name = name.substring(2);
187 // "grouped properties"
188 if (name.equals("L"))
189 return new UnicodeCategoriesHandler(LETTER);
190 if (name.equals("M"))
191 return new UnicodeCategoriesHandler(MARK);
192 if (name.equals("Z"))
193 return new UnicodeCategoriesHandler(SEPARATOR);
194 if (name.equals("S"))
195 return new UnicodeCategoriesHandler(SYMBOL);
196 if (name.equals("N"))
197 return new UnicodeCategoriesHandler(NUMBER);
198 if (name.equals("P"))
199 return new UnicodeCategoriesHandler(PUNCTUATION);
200 if (name.equals("C"))
201 return new UnicodeCategoriesHandler(OTHER);
203 if (name.equals("Mc"))
204 return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
205 if (name.equals("Pc"))
206 return new UnicodeCategoryHandler(Character.CONNECTOR_PUNCTUATION);
207 if (name.equals("Cc"))
208 return new UnicodeCategoryHandler(Character.CONTROL);
209 if (name.equals("Sc"))
210 return new UnicodeCategoryHandler(Character.CURRENCY_SYMBOL);
211 if (name.equals("Pd"))
212 return new UnicodeCategoryHandler(Character.DASH_PUNCTUATION);
213 if (name.equals("Nd"))
214 return new UnicodeCategoryHandler(Character.DECIMAL_DIGIT_NUMBER);
215 if (name.equals("Me"))
216 return new UnicodeCategoryHandler(Character.ENCLOSING_MARK);
217 if (name.equals("Pe"))
218 return new UnicodeCategoryHandler(Character.END_PUNCTUATION);
219 if (name.equals("Pf"))
220 return new UnicodeCategoryHandler(Character.FINAL_QUOTE_PUNCTUATION);
221 if (name.equals("Cf"))
222 return new UnicodeCategoryHandler(Character.FORMAT);
223 if (name.equals("Pi"))
224 return new UnicodeCategoryHandler(Character.INITIAL_QUOTE_PUNCTUATION);
225 if (name.equals("Nl"))
226 return new UnicodeCategoryHandler(Character.LETTER_NUMBER);
227 if (name.equals("Zl"))
228 return new UnicodeCategoryHandler(Character.LINE_SEPARATOR);
229 if (name.equals("Ll"))
230 return new UnicodeCategoryHandler(Character.LOWERCASE_LETTER);
231 if (name.equals("Sm"))
232 return new UnicodeCategoryHandler(Character.MATH_SYMBOL);
233 if (name.equals("Lm"))
234 return new UnicodeCategoryHandler(Character.MODIFIER_LETTER);
235 if (name.equals("Sk"))
236 return new UnicodeCategoryHandler(Character.MODIFIER_SYMBOL);
237 if (name.equals("Mn"))
238 return new UnicodeCategoryHandler(Character.NON_SPACING_MARK);
239 if (name.equals("Lo"))
240 return new UnicodeCategoryHandler(Character.OTHER_LETTER);
241 if (name.equals("No"))
242 return new UnicodeCategoryHandler(Character.OTHER_NUMBER);
243 if (name.equals("Po"))
244 return new UnicodeCategoryHandler(Character.OTHER_PUNCTUATION);
245 if (name.equals("So"))
246 return new UnicodeCategoryHandler(Character.OTHER_SYMBOL);
247 if (name.equals("Zp"))
248 return new UnicodeCategoryHandler(Character.PARAGRAPH_SEPARATOR);
249 if (name.equals("Co"))
250 return new UnicodeCategoryHandler(Character.PRIVATE_USE);
251 if (name.equals("Zs"))
252 return new UnicodeCategoryHandler(Character.SPACE_SEPARATOR);
253 if (name.equals("Ps"))
254 return new UnicodeCategoryHandler(Character.START_PUNCTUATION);
255 if (name.equals("Cs"))
256 return new UnicodeCategoryHandler(Character.SURROGATE);
257 if (name.equals("Lt"))
258 return new UnicodeCategoryHandler(Character.TITLECASE_LETTER);
259 if (name.equals("Cn"))
260 return new UnicodeCategoryHandler(Character.UNASSIGNED);
261 if (name.equals("Lu"))
262 return new UnicodeCategoryHandler(Character.UPPERCASE_LETTER);
263 if (name.equals("all"))
266 public boolean includes(char c)
271 throw new REException("unsupported name " + name, REException.REG_ESCAPE, 0);
274 private static class POSIXHandler extends Handler {
275 private RETokenPOSIX retoken;
276 public POSIXHandler(String name) {
277 int posixId = RETokenPOSIX.intValue(name.toLowerCase());
279 retoken = new RETokenPOSIX(0,posixId,false,false);
281 throw new RuntimeException("Unknown posix ID: " + name);
283 public boolean includes(char c) {
284 return retoken.matchOneChar(c);
288 private static class UnicodeCategoryHandler extends Handler {
289 public UnicodeCategoryHandler(byte category) {
290 this.category = (int)category;
292 private int category;
293 public boolean includes(char c) {
294 return Character.getType(c) == category;
298 private static class UnicodeCategoriesHandler extends Handler {
299 public UnicodeCategoriesHandler(byte[] categories) {
300 this.categories = categories;
302 private byte[] categories;
303 public boolean includes(char c) {
304 int category = Character.getType(c);
305 for (int i = 0; i < categories.length; i++)
306 if (category == categories[i])
312 private static class UnicodeBlockHandler extends Handler {
313 public UnicodeBlockHandler(Character.UnicodeBlock block) {
316 private Character.UnicodeBlock block;
317 public boolean includes(char c) {
318 Character.UnicodeBlock cblock = Character.UnicodeBlock.of(c);
319 return (cblock != null && cblock.equals(block));