2 * This grammar is derived from the Java 1.3 Recognizer
3 * (http://www.antlr.org/grammar/java/java.g) by Mitchell, Parr, Lilley,
4 * Stanchfield, Mohnen, Williams, Jacobs, Messick and Pybus, Version
7 * This grammar recognizes simple Java expressions. The following
8 * language elements are NOT supported:
10 * - type casts to non-primitive types
15 * - increment and decrement operators (both prefix/postfix)
16 * - expressions involving constant classes (Abc.class)
20 package gnu.classpath.tools.gjdoc.expr;
23 class JavaRecognizer extends Parser;
25 k = 2; // two token lookahead
26 exportVocab=Java; // Call its vocabulary "Java"
27 codeGenMakeSwitchThreshold = 2; // Some optimizations
28 codeGenBitsetTestThreshold = 3;
29 defaultErrorHandler = false; // Don't generate parser error handlers
34 BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF;
35 INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF;
36 PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE;
37 PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP;
38 POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT;
39 IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION;
40 FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract";
41 STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL;
44 // A builtin type specification is a builtin type with possible brackets
45 // afterwards (which would make it an array type).
46 builtInTypeSpec[boolean addImagNode] returns [Type t = null]
47 : t=builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
50 #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec);
55 // A type name. which is either a (possibly qualified) class name or
56 // a primitive (builtin) type
61 // The primitive types.
62 builtInType returns [Type t = null]
63 : "void" {t=Type.VOID;}
64 | "boolean" {t=Type.BOOLEAN;}
65 | "byte" {t=Type.BYTE;}
66 | "char" {t=Type.CHAR;}
67 | "short" {t=Type.SHORT;}
68 | "int" {t=Type.INTEGER;}
69 | "float"{t=Type.FLOAT;}
70 | "long" {t=Type.LONG;}
71 | "double" {t=Type.DOUBLE;}
72 | "String" {t=Type.STRING;}
75 // A (possibly-qualified) java identifier. We start with the first IDENT
76 // and expand its name by adding dots and following IDENTS
77 identifier returns [String s = null;]
78 : i:IDENT {s=i.getText();} ( DOT^ i2:IDENT {s+="."+i2.getText();} )*
81 expression returns [Expression e = null]
82 : e=conditionalExpression EOF!
85 // conditional test (level 12)
86 conditionalExpression returns [Expression e = null] { Expression a,b,c; }
87 : e=logicalOrExpression
88 ( QUESTION^ b=conditionalExpression COLON! c=conditionalExpression {e=new ConditionalExpression(e,b,c);} )?
92 // logical or (||) (level 11)
93 logicalOrExpression returns [Expression e = null] { Expression a,b; }
94 : e=logicalAndExpression (LOR^ b=logicalAndExpression {e=new LogicalOrExpression(e,b);})*
98 // logical and (&&) (level 10)
99 logicalAndExpression returns [Expression e = null] { Expression a,b; }
100 : e=inclusiveOrExpression (LAND^ b=inclusiveOrExpression {e=new LogicalAndExpression(e,b);})*
104 // bitwise or non-short-circuiting or (|) (level 9)
105 inclusiveOrExpression returns [Expression e = null] { Expression a,b; }
106 : e=exclusiveOrExpression (BOR^ b=exclusiveOrExpression {e=new InclusiveOrExpression(e,b);})*
110 // exclusive or (^) (level 8)
111 exclusiveOrExpression returns [Expression e = null] { Expression a,b; }
112 : e=andExpression (BXOR^ b=andExpression {e=new ExclusiveOrExpression(e,b);})*
116 // bitwise or non-short-circuiting and (&) (level 7)
117 andExpression returns [Expression e = null] { Expression a,b; }
118 : e=equalityExpression (BAND^ b=equalityExpression {e=new AndExpression(e,b);})*
122 // equality/inequality (==/!=) (level 6)
123 equalityExpression returns [Expression e = null] { Expression a,b; }
124 : e=relationalExpression ((NOT_EQUAL^ a=relationalExpression {e=new NotEqualExpression(e,a);} | EQUAL^ a=relationalExpression {e=new EqualExpression(e,a);}))*
128 // boolean relational expressions (level 5)
129 relationalExpression returns [Expression e = null] { Expression a,b; }
131 ( ( ( LT^ a=shiftExpression {e=new LessThanExpression(e,a);}
132 | GT^ a=shiftExpression {e=new GreaterThanExpression(e,a);}
133 | LE^ a=shiftExpression {e=new LessThanOrEqualExpression(e,a);}
134 | GE^ a=shiftExpression {e=new GreaterThanOrEqualExpression(e,a);}
142 // bit shift expressions (level 4)
143 shiftExpression returns [Expression e = null] { Expression a,b; }
144 : e=additiveExpression ((SL^ a=additiveExpression {e=new ShiftLeftExpression(e,a);} | SR^ a=additiveExpression {e=new ShiftRightExpression(e,a);} | BSR^ a=additiveExpression {e=new BitShiftRightExpression(e,a);}))*
148 // binary addition/subtraction (level 3)
149 additiveExpression returns [Expression e = null] { Expression a,b; }
150 : e=multiplicativeExpression ((PLUS^ a=multiplicativeExpression {e=new AdditionExpression(e,a);} | MINUS^ a=multiplicativeExpression {e=new SubtractionExpression(e,a);}))*
154 // multiplication/division/modulo (level 2)
155 multiplicativeExpression returns [Expression e = null] { Expression a,b; }
156 : e=unaryExpression ((STAR^ a=unaryExpression {e=new MultiplicationExpression(e,a);} | DIV^ a=unaryExpression {e=new DivisionExpression(e,a);} | MOD^ a=unaryExpression {e=new ModuloExpression(e,a);} ))*
160 unaryExpression returns [Expression e = null] { Expression a,b; }
161 : MINUS^ {#MINUS.setType(UNARY_MINUS);} a=unaryExpression {e=new NegateExpression(a);}
162 | PLUS^ {#PLUS.setType(UNARY_PLUS);} e=unaryExpression
163 | e=unaryExpressionNotPlusMinus
166 unaryExpressionNotPlusMinus returns [Expression e = null] { Expression a; Type t; }
167 : BNOT^ a=unaryExpression {e=new NotExpression(a);}
168 | LNOT^ a=unaryExpression {e=new LogicalNotExpression(a);}
170 // use predicate to skip cases like: (int.class)
171 | (LPAREN builtInTypeSpec[true] RPAREN) =>
172 lpb:LPAREN^ {#lpb.setType(TYPECAST);} t=builtInTypeSpec[true] RPAREN!
173 a=unaryExpression {e=new TypeCastExpression(t,a);}
175 | e=primaryExpression
178 // the basic element of an expression
179 primaryExpression returns [Expression e = null; String i = null;]
181 | i=identifier {e=new IdentifierExpression(i);}
182 | "true" { e=new ConstantBoolean(true); }
183 | "false" { e=new ConstantBoolean(false); }
184 | "null" { e=new ConstantNull(); }
185 | LPAREN! e=conditionalExpression RPAREN!
188 /** Match a, a.b.c refs
190 identPrimary returns [Expression e = null]
194 // .ident could match here or in postfixExpression.
195 // We do want to match here. Turn off warning.
202 constant returns [Expression e = null]
203 : l1:NUM_INT {e=new ConstantInteger(l1.getText());}
204 | l2:CHAR_LITERAL {e=new ConstantChar(l2.getText());}
205 | l3:STRING_LITERAL {e=new ConstantString(l3.getText().substring(1, l3.getText().length()-1)); }
206 | l4:NUM_FLOAT {e=new ConstantFloat(l4.getText());}
207 | l5:NUM_LONG {e=new ConstantLong(l5.getText());}
208 | l6:NUM_DOUBLE {e=new ConstantDouble(l6.getText());}
212 //----------------------------------------------------------------------------
214 //----------------------------------------------------------------------------
215 class JavaLexer extends Lexer;
218 exportVocab=Java; // call the vocabulary "Java"
219 testLiterals=false; // don't automatically test for literals
220 k=4; // four characters of lookahead
221 charVocabulary='\u0003'..'\uFFFF';
222 // without inlining some bitset tests, couldn't do unicode;
223 // I need to make ANTLR generate smaller bitsets; see
224 // bottom of JavaLexer.java
225 codeGenBitsetTestThreshold=20;
252 MINUS_ASSIGN : "-=" ;
261 BSR_ASSIGN : ">>>=" ;
279 // Whitespace -- ignored
284 | ( options {generateAmbigWarnings=false;}
287 | '\n' // Unix (the right way)
291 { _ttype = Token.SKIP; }
294 // Single-line comments
297 (~('\n'|'\r'))* ('\n'|'\r'('\n')?)
298 {$setType(Token.SKIP); newline();}
301 // multiple-line comments
304 ( /* '\r' '\n' can be matched in one alternative or by matching
305 '\r' in one iteration and '\n' in another. I am trying to
306 handle any flavor of newline that comes in, but the language
307 that allows both "\r\n" and "\r" and "\n" to all be valid
308 newline is ambiguous. Consequently, the resulting grammar
309 must be ambiguous. I'm shutting this warning off.
312 generateAmbigWarnings=false;
316 | '\r' '\n' {newline();}
322 {$setType(Token.SKIP);}
326 // character literals
328 : '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\''
333 : '"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"'
337 // escape sequence -- note that this is protected; it can only be called
338 // from another lexer rule -- it will not ever directly return a token to
340 // There are various ambiguities hushed in this rule. The optional
341 // '0'...'9' digit matches should be matched here rather than letting
342 // them go back to STRING_LITERAL to be matched. ANTLR does the
343 // right thing by matching immediately; hence, it's ok to shut off
344 // the FOLLOW ambig warnings.
356 | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
360 warnWhenFollowAmbig = false;
365 warnWhenFollowAmbig = false;
373 warnWhenFollowAmbig = false;
381 // hexadecimal digit (again, note it's protected!)
384 : ('0'..'9'|'A'..'F'|'a'..'f')
388 // a dummy rule to force vocabulary to be all characters (except special
389 // ones that ANTLR uses internally (0 to 2)
396 // an identifier. Note that testLiterals is set to true! This means
397 // that after we match the rule, we look in the literals table to see
398 // if it's a literal or really an identifer
400 options {testLiterals=true;}
401 : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
407 {boolean isDecimal=false; Token t=null;}
408 : '.' {_ttype = DOT;}
409 ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})?
411 if (t != null && t.getText().toUpperCase().indexOf('F')>=0) {
415 _ttype = NUM_DOUBLE; // assume double
420 | ( '0' {isDecimal = true;} // special case for just '0'
423 // the 'e'|'E' and float suffix stuff look
424 // like hex digits, hence the (...)+ doesn't
425 // know when to stop: ambig. ANTLR resolves
426 // it correctly by matching immediately. It
427 // is therefor ok to hush warning.
429 warnWhenFollowAmbig=false;
434 | //float or double with leading zero
435 (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+
437 | ('0'..'7')+ // octal
439 | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal
441 ( ('l'|'L') { _ttype = NUM_LONG; }
443 // only check to see if it's a float if looks like decimal so far
445 ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})?
446 | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})?
447 | f4:FLOAT_SUFFIX {t=f4;}
450 if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) {
454 _ttype = NUM_DOUBLE; // assume double
461 // a couple protected methods to assist in matching floating point numbers
464 : ('e'|'E') ('+'|'-')? ('0'..'9')+