* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
-public final class RESyntax implements Serializable {
- static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
+public final class RESyntax implements Serializable
+{
+ static final String DEFAULT_LINE_SEPARATOR =
+ System.getProperty ("line.separator");
- private BitSet bits;
+ private BitSet bits;
- // true for the constant defined syntaxes
- private boolean isFinal = false;
+ // true for the constant defined syntaxes
+ private boolean isFinal = false;
- private String lineSeparator = DEFAULT_LINE_SEPARATOR;
+ private String lineSeparator = DEFAULT_LINE_SEPARATOR;
// Values for constants are bit indexes
/**
* Syntax bit. Backslash is an escape character in lists.
*/
- public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
+ public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
/**
* Syntax bit. Use \? instead of ? and \+ instead of +.
*/
- public static final int RE_BK_PLUS_QM = 1;
+ public static final int RE_BK_PLUS_QM = 1;
/**
* Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
*/
- public static final int RE_CHAR_CLASSES = 2;
+ public static final int RE_CHAR_CLASSES = 2;
/**
* Syntax bit. ^ and $ are special everywhere.
* <B>Not implemented.</B>
*/
- public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
+ public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
/**
* Syntax bit. Repetition operators are only special in valid positions.
* <B>Not implemented.</B>
*/
- public static final int RE_CONTEXT_INDEP_OPS = 4;
+ public static final int RE_CONTEXT_INDEP_OPS = 4;
/**
* Syntax bit. Repetition and alternation operators are invalid
* at start and end of pattern and other places.
* <B>Not implemented</B>.
*/
- public static final int RE_CONTEXT_INVALID_OPS = 5;
+ public static final int RE_CONTEXT_INVALID_OPS = 5;
/**
* Syntax bit. Match-any-character operator (.) matches a newline.
*/
- public static final int RE_DOT_NEWLINE = 6;
+ public static final int RE_DOT_NEWLINE = 6;
/**
* Syntax bit. Match-any-character operator (.) does not match a null.
*/
- public static final int RE_DOT_NOT_NULL = 7;
+ public static final int RE_DOT_NOT_NULL = 7;
/**
* Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
*/
- public static final int RE_INTERVALS = 8;
+ public static final int RE_INTERVALS = 8;
/**
* Syntax bit. No alternation (|), match one-or-more (+), or
* match zero-or-one (?) operators.
*/
- public static final int RE_LIMITED_OPS = 9;
+ public static final int RE_LIMITED_OPS = 9;
/**
* Syntax bit. Newline is an alternation operator.
*/
- public static final int RE_NEWLINE_ALT = 10; // impl.
+ public static final int RE_NEWLINE_ALT = 10; // impl.
/**
* Syntax bit. Intervals use { } instead of \{ \}
*/
- public static final int RE_NO_BK_BRACES = 11;
+ public static final int RE_NO_BK_BRACES = 11;
/**
* Syntax bit. Grouping uses ( ) instead of \( \).
*/
- public static final int RE_NO_BK_PARENS = 12;
+ public static final int RE_NO_BK_PARENS = 12;
/**
* Syntax bit. Backreferences not allowed.
*/
- public static final int RE_NO_BK_REFS = 13;
+ public static final int RE_NO_BK_REFS = 13;
/**
* Syntax bit. Alternation uses | instead of \|
*/
- public static final int RE_NO_BK_VBAR = 14;
+ public static final int RE_NO_BK_VBAR = 14;
/**
* Syntax bit. <B>Not implemented</B>.
*/
- public static final int RE_NO_EMPTY_RANGES = 15;
+ public static final int RE_NO_EMPTY_RANGES = 15;
/**
* Syntax bit. An unmatched right parenthesis (')' or '\)', depending
/**
* Syntax bit. <B>Not implemented.</B>
*/
- public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
+ public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
/**
* Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
*/
- public static final int RE_STINGY_OPS = 18;
+ public static final int RE_STINGY_OPS = 18;
/**
* Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
*/
- public static final int RE_CHAR_CLASS_ESCAPES = 19;
+ public static final int RE_CHAR_CLASS_ESCAPES = 19;
/**
* Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
*/
- public static final int RE_PURE_GROUPING = 20;
+ public static final int RE_PURE_GROUPING = 20;
/**
* Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
* to the text following the current position without consuming that text.
*/
- public static final int RE_LOOKAHEAD = 21;
+ public static final int RE_LOOKAHEAD = 21;
/**
* Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
*/
- public static final int RE_STRING_ANCHORS = 22;
+ public static final int RE_STRING_ANCHORS = 22;
/**
* Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
*/
- public static final int RE_COMMENTS = 23;
+ public static final int RE_COMMENTS = 23;
/**
* Syntax bit. Allow character class escapes within lists, as in Perl5.
*/
- public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
+ public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
/**
* Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+).
*/
- public static final int RE_POSSESSIVE_OPS = 25;
+ public static final int RE_POSSESSIVE_OPS = 25;
/**
* Syntax bit. Allow embedded flags, (?is-x), as in Perl5.
*/
- public static final int RE_EMBEDDED_FLAGS = 26;
+ public static final int RE_EMBEDDED_FLAGS = 26;
/**
* Syntax bit. Allow octal char (\0377), as in Perl5.
*/
- public static final int RE_OCTAL_CHAR = 27;
+ public static final int RE_OCTAL_CHAR = 27;
/**
* Syntax bit. Allow hex char (\x1b), as in Perl5.
*/
- public static final int RE_HEX_CHAR = 28;
+ public static final int RE_HEX_CHAR = 28;
/**
* Syntax bit. Allow Unicode char (\u1234), as in Java 1.4.
*/
- public static final int RE_UNICODE_CHAR = 29;
+ public static final int RE_UNICODE_CHAR = 29;
/**
* Syntax bit. Allow named property (\p{P}, \P{p}), as in Perl5.
*/
- public static final int RE_NAMED_PROPERTY = 30;
+ public static final int RE_NAMED_PROPERTY = 30;
/**
* Syntax bit. Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
*/
- public static final int RE_NESTED_CHARCLASS = 31;
+ public static final int RE_NESTED_CHARCLASS = 31;
- private static final int BIT_TOTAL = 32;
+ private static final int BIT_TOTAL = 32;
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 4,
* using single line mode (/s modifier).
*/
- public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
+ public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 5.
*/
- public static final RESyntax RE_SYNTAX_PERL5;
+ public static final RESyntax RE_SYNTAX_PERL5;
/**
* Predefined syntax.
* Emulates regular expression support in Java 1.4's java.util.regex
* package.
*/
- public static final RESyntax RE_SYNTAX_JAVA_1_4;
-
- static {
- // Define syntaxes
-
- RE_SYNTAX_EMACS = new RESyntax().makeFinal();
-
- RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
- .set(RE_CHAR_CLASSES)
- .set(RE_DOT_NEWLINE)
- .set(RE_DOT_NOT_NULL)
- .set(RE_INTERVALS)
- .set(RE_NO_EMPTY_RANGES)
- .makeFinal();
-
- RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
- .set(RE_BK_PLUS_QM)
- .makeFinal();
-
- RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
- .set(RE_CONTEXT_INDEP_ANCHORS)
- .set(RE_CONTEXT_INDEP_OPS)
- .set(RE_NO_BK_BRACES)
- .set(RE_NO_BK_PARENS)
- .set(RE_NO_BK_VBAR)
- .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
- .makeFinal();
-
- RE_SYNTAX_AWK = new RESyntax()
- .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
- .set(RE_DOT_NOT_NULL)
- .set(RE_NO_BK_PARENS)
- .set(RE_NO_BK_REFS)
- .set(RE_NO_BK_VBAR)
- .set(RE_NO_EMPTY_RANGES)
- .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
- .makeFinal();
-
- RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
- .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
- .makeFinal();
-
- RE_SYNTAX_GREP = new RESyntax()
- .set(RE_BK_PLUS_QM)
- .set(RE_CHAR_CLASSES)
- .set(RE_HAT_LISTS_NOT_NEWLINE)
- .set(RE_INTERVALS)
- .set(RE_NEWLINE_ALT)
- .makeFinal();
-
- RE_SYNTAX_EGREP = new RESyntax()
- .set(RE_CHAR_CLASSES)
- .set(RE_CONTEXT_INDEP_ANCHORS)
- .set(RE_CONTEXT_INDEP_OPS)
- .set(RE_HAT_LISTS_NOT_NEWLINE)
- .set(RE_NEWLINE_ALT)
- .set(RE_NO_BK_PARENS)
- .set(RE_NO_BK_VBAR)
- .makeFinal();
-
- RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
- .set(RE_INTERVALS)
- .set(RE_NO_BK_BRACES)
- .makeFinal();
-
- /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
-
- RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
- .makeFinal();
-
- RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
- .makeFinal();
-
- RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
- .set(RE_LIMITED_OPS)
- .makeFinal();
-
- /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
- replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
-
- RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
- .set(RE_CONTEXT_INDEP_ANCHORS)
- .set(RE_CONTEXT_INVALID_OPS)
- .set(RE_NO_BK_BRACES)
- .set(RE_NO_BK_PARENS)
- .set(RE_NO_BK_REFS)
- .set(RE_NO_BK_VBAR)
- .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
- .makeFinal();
-
- /* There is no official Perl spec, but here's a "best guess" */
-
- RE_SYNTAX_PERL4 = new RESyntax()
- .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
- .set(RE_CONTEXT_INDEP_ANCHORS)
- .set(RE_CONTEXT_INDEP_OPS) // except for '{', apparently
- .set(RE_INTERVALS)
- .set(RE_NO_BK_BRACES)
- .set(RE_NO_BK_PARENS)
- .set(RE_NO_BK_VBAR)
- .set(RE_NO_EMPTY_RANGES)
- .set(RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
- .makeFinal();
-
- RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
- .set(RE_DOT_NEWLINE)
- .makeFinal();
-
- RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
- .set(RE_PURE_GROUPING) // (?:)
- .set(RE_STINGY_OPS) // *?,??,+?,{}?
- .set(RE_LOOKAHEAD) // (?=)(?!)
- .set(RE_STRING_ANCHORS) // \A,\Z
- .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
- .set(RE_COMMENTS) // (?#)
- .set(RE_EMBEDDED_FLAGS) // (?imsx-imsx)
- .set(RE_OCTAL_CHAR) // \0377
- .set(RE_HEX_CHAR) // \x1b
- .set(RE_NAMED_PROPERTY) // \p{prop}, \P{prop}
- .makeFinal();
-
- RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
- .set(RE_DOT_NEWLINE)
- .makeFinal();
-
- RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
- // XXX
- .set(RE_POSSESSIVE_OPS) // *+,?+,++,{}+
- .set(RE_UNICODE_CHAR) // \u1234
- .set(RE_NESTED_CHARCLASS) // [a-z&&[^p-r]]
- .makeFinal();
+ public static final RESyntax RE_SYNTAX_JAVA_1_4;
+
+ static
+ {
+ // Define syntaxes
+
+ RE_SYNTAX_EMACS = new RESyntax ().makeFinal ();
+
+ RESyntax RE_SYNTAX_POSIX_COMMON =
+ new RESyntax ().set (RE_CHAR_CLASSES).set (RE_DOT_NEWLINE).
+ set (RE_DOT_NOT_NULL).set (RE_INTERVALS).set (RE_NO_EMPTY_RANGES).
+ makeFinal ();
+
+ RE_SYNTAX_POSIX_BASIC =
+ new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_BK_PLUS_QM).makeFinal ();
+
+ RE_SYNTAX_POSIX_EXTENDED =
+ new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
+ set (RE_CONTEXT_INDEP_OPS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).
+ set (RE_NO_BK_VBAR).set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
+
+ RE_SYNTAX_AWK =
+ new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).
+ set (RE_DOT_NOT_NULL).set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).
+ set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).
+ set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
+
+ RE_SYNTAX_POSIX_AWK =
+ new RESyntax (RE_SYNTAX_POSIX_EXTENDED).
+ set (RE_BACKSLASH_ESCAPE_IN_LISTS).makeFinal ();
+
+ RE_SYNTAX_GREP =
+ new RESyntax ().set (RE_BK_PLUS_QM).set (RE_CHAR_CLASSES).
+ set (RE_HAT_LISTS_NOT_NEWLINE).set (RE_INTERVALS).set (RE_NEWLINE_ALT).
+ makeFinal ();
+
+ RE_SYNTAX_EGREP =
+ new RESyntax ().set (RE_CHAR_CLASSES).set (RE_CONTEXT_INDEP_ANCHORS).
+ set (RE_CONTEXT_INDEP_OPS).set (RE_HAT_LISTS_NOT_NEWLINE).
+ set (RE_NEWLINE_ALT).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).
+ makeFinal ();
+
+ RE_SYNTAX_POSIX_EGREP =
+ new RESyntax (RE_SYNTAX_EGREP).set (RE_INTERVALS).set (RE_NO_BK_BRACES).
+ makeFinal ();
+
+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+
+ RE_SYNTAX_ED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
+
+ RE_SYNTAX_SED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal ();
+
+ RE_SYNTAX_POSIX_MINIMAL_BASIC =
+ new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_LIMITED_OPS).makeFinal ();
+
+ /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+
+ RE_SYNTAX_POSIX_MINIMAL_EXTENDED =
+ new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS).
+ set (RE_CONTEXT_INVALID_OPS).set (RE_NO_BK_BRACES).
+ set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).set (RE_NO_BK_VBAR).
+ set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal ();
+
+ /* There is no official Perl spec, but here's a "best guess" */
+
+ RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently
+ .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
+ .makeFinal ();
+
+ RE_SYNTAX_PERL4_S =
+ new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal ();
+
+ RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:)
+ .set (RE_STINGY_OPS) // *?,??,+?,{}?
+ .set (RE_LOOKAHEAD) // (?=)(?!)
+ .set (RE_STRING_ANCHORS) // \A,\Z
+ .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within []
+ .set (RE_COMMENTS) // (?#)
+ .set (RE_EMBEDDED_FLAGS) // (?imsx-imsx)
+ .set (RE_OCTAL_CHAR) // \0377
+ .set (RE_HEX_CHAR) // \x1b
+ .set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop}
+ .makeFinal ();
+
+ RE_SYNTAX_PERL5_S =
+ new RESyntax (RE_SYNTAX_PERL5).set (RE_DOT_NEWLINE).makeFinal ();
+
+ RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5)
+ // XXX
+ .set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+
+ .set (RE_UNICODE_CHAR) // \u1234
+ .set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]]
+ .makeFinal ();
}
/**
* Construct a new syntax object with all bits turned off.
* This is equivalent to RE_SYNTAX_EMACS.
*/
- public RESyntax() {
- bits = new BitSet(BIT_TOTAL);
+ public RESyntax ()
+ {
+ bits = new BitSet (BIT_TOTAL);
}
/**
*
* @return this object for convenient chaining
*/
- public RESyntax makeFinal() {
- isFinal = true;
- return this;
- }
+ public RESyntax makeFinal ()
+ {
+ isFinal = true;
+ return this;
+ }
/**
* Construct a new syntax object with all bits set the same
* as the other syntax.
*/
- public RESyntax(RESyntax other) {
- bits = (BitSet) other.bits.clone();
+ public RESyntax (RESyntax other)
+ {
+ bits = (BitSet) other.bits.clone ();
}
/**
* Check if a given bit is set in this syntax.
*/
- public boolean get(int index) {
- return bits.get(index);
+ public boolean get (int index)
+ {
+ return bits.get (index);
}
/**
* @param index the constant (RESyntax.RE_xxx) bit to set.
* @return a reference to this object for easy chaining.
*/
- public RESyntax set(int index) {
+ public RESyntax set (int index)
+ {
if (isFinal)
- throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final"));
- bits.set(index);
+ throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
+ bits.set (index);
return this;
}
* @param index the constant (RESyntax.RE_xxx) bit to clear.
* @return a reference to this object for easy chaining.
*/
- public RESyntax clear(int index) {
- if (isFinal)
- throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final"));
- bits.clear(index);
- return this;
+ public RESyntax clear (int index)
+ {
+ if (isFinal)
+ throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
+ bits.clear (index);
+ return this;
}
/**
*
* @return this object for convenient chaining
*/
- public RESyntax setLineSeparator(String aSeparator) {
- if (isFinal)
- throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final"));
- lineSeparator = aSeparator;
- return this;
- }
+ public RESyntax setLineSeparator (String aSeparator)
+ {
+ if (isFinal)
+ throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final"));
+ lineSeparator = aSeparator;
+ return this;
+ }
/**
* Returns the currently active line separator string. The default
* is the platform-dependent system property "line.separator".
*/
- public String getLineSeparator() {
- return lineSeparator;
- }
+ public String getLineSeparator ()
+ {
+ return lineSeparator;
+ }
}