1 // Character.java - Character class.
3 /* Copyright (C) 1998, 1999 Cygnus Solutions
5 This file is part of libgcj.
7 This software is copyrighted work licensed under the terms of the
8 Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
13 import java.io.Serializable;
16 * @author Tom Tromey <tromey@cygnus.com>
17 * @date September 10, 1998
20 /* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3
21 * "The Java Language Specification", ISBN 0-201-63451-1,
22 * online API docs for JDK 1.2 beta from http://www.javasoft.com,
23 * and The Unicode Standard Version 2.0.
24 * Status: Believed complete and correct for JDK 1.1; 1.2 methods
28 public final class Character implements Serializable, Comparable
30 public static final char MIN_VALUE = '\u0000';
31 public static final char MAX_VALUE = '\uffff';
33 public static final int MIN_RADIX = 2;
34 public static final int MAX_RADIX = 36;
36 // This initialization is seemingly circular, but it is accepted
37 // by javac, and is handled specially by gcc.
38 public static final Class TYPE = char.class;
41 public static final byte SPACE_SEPARATOR = 12;
42 public static final byte LINE_SEPARATOR = 13;
43 public static final byte PARAGRAPH_SEPARATOR = 14;
46 public static final byte UPPERCASE_LETTER = 1;
47 public static final byte LOWERCASE_LETTER = 2;
48 public static final byte TITLECASE_LETTER = 3;
49 public static final byte MODIFIER_LETTER = 4;
50 public static final byte OTHER_LETTER = 5;
53 public static final byte DECIMAL_DIGIT_NUMBER = 9;
54 public static final byte LETTER_NUMBER = 10;
55 public static final byte OTHER_NUMBER = 11;
58 public static final byte NON_SPACING_MARK = 6;
59 public static final byte ENCLOSING_MARK = 7;
60 public static final byte COMBINING_SPACING_MARK = 8;
63 public static final byte DASH_PUNCTUATION = 20;
64 public static final byte START_PUNCTUATION = 21;
65 public static final byte END_PUNCTUATION = 22;
66 public static final byte CONNECTOR_PUNCTUATION = 23;
67 public static final byte OTHER_PUNCTUATION = 24;
70 public static final byte MATH_SYMBOL = 25;
71 public static final byte CURRENCY_SYMBOL = 26;
72 public static final byte MODIFIER_SYMBOL = 27;
73 public static final byte OTHER_SYMBOL = 28;
76 public static final byte CONTROL = 15;
77 // Note: The JCL book says that both FORMAT and PRIVATE_USE are 18.
78 // However, FORMAT is actually 16.
79 public static final byte FORMAT = 16;
82 public static final byte UNASSIGNED = 0;
83 public static final byte PRIVATE_USE = 18;
84 public static final byte SURROGATE = 19;
87 public Character (char ch)
92 public char charValue ()
97 // See if a character is a digit. If so, return the corresponding
98 // value. Otherwise return -1.
99 private static native int digit_value (char ch);
101 public static int digit (char ch, int radix)
103 if (radix < MIN_RADIX || radix > MAX_RADIX)
106 int d = digit_value (ch);
109 if (ch >= 'A' && ch <= 'Z')
111 else if (ch >= 'a' && ch <= 'z')
116 return d >= radix ? -1 : d;
119 public boolean equals (Object obj)
121 // Don't need to compare OBJ to null as instanceof will do this.
122 if (obj instanceof Character)
123 return value == ((Character) obj).value;
127 public static char forDigit (int d, int rdx)
129 if (d < 0 || d >= rdx || rdx < MIN_RADIX || rdx > MAX_RADIX)
132 return (char) ('0' + d);
133 // The Java Language Spec says to use lowercase, while the JCL
134 // says to use uppercase. We go with the former.
135 return (char) ('a' + d - 10);
138 public static native int getNumericValue (char ch);
139 public static native int getType (char ch);
141 public int hashCode ()
146 public static boolean isDefined (char ch)
148 return getType (ch) != UNASSIGNED;
151 public static boolean isDigit (char ch)
153 return digit_value (ch) != -1;
156 // The JCL book says that the argument here is a Character. That is
158 public static boolean isIdentifierIgnorable (char ch)
160 // This information comes from the Unicode Standard. It isn't
161 // auto-generated as it doesn't appear in the unidata table.
162 return ((ch >= '\u0000' && ch <= '\u0008')
163 || (ch >= '\u000e' && ch <= '\u001b')
164 // JDK 1.2 docs say that these are ignorable. The Unicode
165 // Standard is somewhat ambiguous on this issue.
166 || (ch >= '\u007f' && ch <= '\u009f')
167 || (ch >= '\u200c' && ch <= '\u200f')
168 // JCl says 200a through 200e, but that is a typo. The
169 // Unicode standard says the bidi controls are 202a
171 || (ch >= '\u202a' && ch <= '\u202e')
172 || (ch >= '\u206a' && ch <= '\u206f')
176 public static boolean isISOControl (char c)
178 return ((c >= '\u0000' && c <= '\u001f')
179 || (c >= '\u007f' && c <= '\u009f'));
182 public static boolean isJavaIdentifierPart (char ch)
184 if (isIdentifierIgnorable (ch) || isDigit (ch))
186 int type = getType (ch);
187 return (type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
188 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
189 || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
190 || type == TITLECASE_LETTER || type == MODIFIER_LETTER
191 || type == OTHER_LETTER || type == LETTER_NUMBER);
194 public static boolean isJavaIdentifierStart (char ch)
196 int type = getType (ch);
197 return (type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION
198 || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
199 || type == TITLECASE_LETTER || type == MODIFIER_LETTER
200 || type == OTHER_LETTER);
203 // Deprecated in 1.2.
204 public static boolean isJavaLetter (char ch)
206 return ch == '$' || ch == '_' || isLetter (ch);
209 // Deprecated in 1.2.
210 public static boolean isJavaLetterOrDigit (char ch)
212 return ch == '$' || ch == '_' || isLetterOrDigit (ch);
215 public static boolean isLetter (char ch)
217 int type = getType (ch);
218 return (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
219 || type == TITLECASE_LETTER || type == MODIFIER_LETTER
220 || type == OTHER_LETTER);
223 public static boolean isLetterOrDigit (char ch)
225 return isDigit (ch) || isLetter (ch);
228 public static native boolean isLowerCase (char ch);
230 // Deprecated in JCL.
231 public static boolean isSpace (char ch)
233 return ch == '\n' || ch == '\t' || ch == '\f' || ch == '\r' || ch == ' ';
236 public static native boolean isSpaceChar (char ch);
237 public static native boolean isTitleCase (char ch);
239 public static boolean isUnicodeIdentifierPart (char ch)
241 if (isIdentifierIgnorable (ch) || isDigit (ch))
243 int type = getType (ch);
244 return (type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER
245 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK
246 || type == UPPERCASE_LETTER || type == LOWERCASE_LETTER
247 || type == TITLECASE_LETTER || type == MODIFIER_LETTER
248 || type == OTHER_LETTER);
251 public static boolean isUnicodeIdentifierStart (char ch)
253 return isLetter (ch);
256 public static native boolean isUpperCase (char ch);
258 public static boolean isWhitespace (char ch)
260 return ((ch >= '\u0009' && ch <= '\r')
261 || (ch >= '\u001c' && ch <= '\u001f')
262 || (ch != '\u00a0' && ch != '\ufeff' && isSpaceChar (ch)));
265 public static native char toLowerCase (char ch);
266 public static native char toTitleCase (char ch);
267 public static native char toUpperCase (char ch);
269 public String toString ()
271 return String.valueOf(value);
274 public int compareTo (Character anotherCharacter)
276 return value - anotherCharacter.value;
279 public int compareTo (Object o)
281 return compareTo ((Character) o);