libjava/classpath/tools/gnu/classpath/tools/gjdoc/expr/java-expression.g

   1 /*
   2  * This grammar is derived from the Java 1.3 Recognizer
   3  * (http://www.antlr.org/grammar/java/java.g) by Mitchell, Parr, Lilley,
   4  * Stanchfield, Mohnen, Williams, Jacobs, Messick and Pybus, Version
   5  * 1.21.
   6  *
   7  * This grammar recognizes simple Java expressions. The following
   8  * language elements are NOT supported:
   9  *
  10  * - type casts to non-primitive types
  11  * - method calls
  12  * - constructor calls
  13  * - array access
  14  * - comma expressions
  15  * - increment and decrement operators (both prefix/postfix)
  16  * - expressions involving constant classes (Abc.class)
  17  */
  18
  19 header {
  20    package gnu.classpath.tools.gjdoc.expr;
  21 }
  22
  23 class JavaRecognizer extends Parser;
  24 options {
  25         k = 2;                           // two token lookahead
  26         exportVocab=Java;                // Call its vocabulary "Java"
  27         codeGenMakeSwitchThreshold = 2;  // Some optimizations
  28         codeGenBitsetTestThreshold = 3;
  29         defaultErrorHandler = false;     // Don't generate parser error handlers
  30         buildAST = true;
  31 }
  32
  33 tokens {
  34         BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF;
  35         INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF;
  36         PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE;
  37         PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP;
  38         POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT;
  39         IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION;
  40         FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract";
  41         STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL;
  42 }
  43
  44 // A builtin type specification is a builtin type with possible brackets
  45 // afterwards (which would make it an array type).
  46 builtInTypeSpec[boolean addImagNode] returns [Type t = null]
  47         :       t=builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
  48                 {
  49                         if ( addImagNode ) {
  50                                 #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec);
  51                         }
  52                 }
  53         ;
  54
  55 // A type name. which is either a (possibly qualified) class name or
  56 //   a primitive (builtin) type
  57 type returns [Type t]
  58         :       t=builtInType
  59         ;
  60
  61 // The primitive types.
  62 builtInType returns [Type t = null]
  63         :       "void" {t=Type.VOID;}
  64         |       "boolean" {t=Type.BOOLEAN;}
  65         |       "byte" {t=Type.BYTE;}
  66         |       "char" {t=Type.CHAR;}
  67         |       "short" {t=Type.SHORT;}
  68         |       "int" {t=Type.INTEGER;}
  69         |       "float"{t=Type.FLOAT;}
  70         |       "long" {t=Type.LONG;}
  71         |       "double" {t=Type.DOUBLE;}
  72         |       "String" {t=Type.STRING;}
  73         ;
  74
  75 // A (possibly-qualified) java identifier.  We start with the first IDENT
  76 //   and expand its name by adding dots and following IDENTS
  77 identifier returns [String s = null;]
  78         :       i:IDENT {s=i.getText();}  ( DOT^ i2:IDENT {s+="."+i2.getText();} )*
  79         ;
  80
  81 expression returns [Expression e = null]
  82     :   e=conditionalExpression EOF!
  83     ;
  84
  85 // conditional test (level 12)
  86 conditionalExpression returns [Expression e = null] { Expression a,b,c; }
  87         :       e=logicalOrExpression
  88                 ( QUESTION^ b=conditionalExpression COLON! c=conditionalExpression {e=new ConditionalExpression(e,b,c);} )?
  89         ;
  90
  91
  92 // logical or (||)  (level 11)
  93 logicalOrExpression returns [Expression e = null] { Expression a,b; }
  94         :       e=logicalAndExpression (LOR^ b=logicalAndExpression {e=new LogicalOrExpression(e,b);})*
  95         ;
  96
  97
  98 // logical and (&&)  (level 10)
  99 logicalAndExpression returns [Expression e = null] { Expression a,b; }
 100         :       e=inclusiveOrExpression (LAND^ b=inclusiveOrExpression {e=new LogicalAndExpression(e,b);})*
 101         ;
 102
 103
 104 // bitwise or non-short-circuiting or (|)  (level 9)
 105 inclusiveOrExpression returns [Expression e = null] { Expression a,b; }
 106         :       e=exclusiveOrExpression (BOR^ b=exclusiveOrExpression {e=new InclusiveOrExpression(e,b);})*
 107         ;
 108
 109
 110 // exclusive or (^)  (level 8)
 111 exclusiveOrExpression returns [Expression e = null] { Expression a,b; }
 112         :       e=andExpression (BXOR^ b=andExpression {e=new ExclusiveOrExpression(e,b);})*
 113         ;
 114
 115
 116 // bitwise or non-short-circuiting and (&)  (level 7)
 117 andExpression returns [Expression e = null] { Expression a,b; }
 118         :       e=equalityExpression (BAND^ b=equalityExpression {e=new AndExpression(e,b);})*
 119         ;
 120
 121
 122 // equality/inequality (==/!=) (level 6)
 123 equalityExpression returns [Expression e = null] { Expression a,b; }
 124         :       e=relationalExpression ((NOT_EQUAL^ a=relationalExpression {e=new NotEqualExpression(e,a);} | EQUAL^ a=relationalExpression {e=new EqualExpression(e,a);}))*
 125         ;
 126
 127
 128 // boolean relational expressions (level 5)
 129 relationalExpression returns [Expression e = null] { Expression a,b; }
 130         :       e=shiftExpression
 131                 (       (       (       LT^ a=shiftExpression {e=new LessThanExpression(e,a);}
 132                                 |       GT^ a=shiftExpression {e=new GreaterThanExpression(e,a);}
 133                                 |       LE^ a=shiftExpression {e=new LessThanOrEqualExpression(e,a);}
 134                                 |       GE^ a=shiftExpression {e=new GreaterThanOrEqualExpression(e,a);}
 135                                 )
 136
 137                         )*
 138                 )
 139         ;
 140
 141
 142 // bit shift expressions (level 4)
 143 shiftExpression returns [Expression e = null] { Expression a,b; }
 144         :       e=additiveExpression ((SL^ a=additiveExpression {e=new ShiftLeftExpression(e,a);} | SR^ a=additiveExpression {e=new ShiftRightExpression(e,a);} | BSR^ a=additiveExpression {e=new BitShiftRightExpression(e,a);}))*
 145         ;
 146
 147
 148 // binary addition/subtraction (level 3)
 149 additiveExpression returns [Expression e = null] { Expression a,b; }
 150    :    e=multiplicativeExpression ((PLUS^ a=multiplicativeExpression {e=new AdditionExpression(e,a);} | MINUS^ a=multiplicativeExpression {e=new SubtractionExpression(e,a);}))*
 151         ;
 152
 153
 154 // multiplication/division/modulo (level 2)
 155 multiplicativeExpression returns [Expression e = null] { Expression a,b; }
 156         :       e=unaryExpression ((STAR^ a=unaryExpression {e=new MultiplicationExpression(e,a);} | DIV^ a=unaryExpression {e=new DivisionExpression(e,a);} | MOD^ a=unaryExpression {e=new ModuloExpression(e,a);} ))*
 157         ;
 158
 159
 160 unaryExpression returns [Expression e = null] { Expression a,b; }
 161         :       MINUS^ {#MINUS.setType(UNARY_MINUS);} a=unaryExpression {e=new NegateExpression(a);}
 162         |       PLUS^  {#PLUS.setType(UNARY_PLUS);} e=unaryExpression
 163         |       e=unaryExpressionNotPlusMinus
 164         ;
 165
 166 unaryExpressionNotPlusMinus returns [Expression e = null] { Expression a; Type t; }
 167         :       BNOT^ a=unaryExpression {e=new NotExpression(a);}
 168         |       LNOT^ a=unaryExpression {e=new LogicalNotExpression(a);}
 169
 170                 // use predicate to skip cases like: (int.class)
 171     |   (LPAREN builtInTypeSpec[true] RPAREN) =>
 172         lpb:LPAREN^ {#lpb.setType(TYPECAST);} t=builtInTypeSpec[true] RPAREN!
 173         a=unaryExpression {e=new TypeCastExpression(t,a);}
 174
 175     |   e=primaryExpression
 176         ;
 177
 178 // the basic element of an expression
 179 primaryExpression returns [Expression e = null; String i = null;]
 180         :       e=constant
 181         |       i=identifier {e=new IdentifierExpression(i);}
 182         |       "true" { e=new ConstantBoolean(true); }
 183         |       "false" { e=new ConstantBoolean(false); }
 184         |       "null" { e=new ConstantNull(); }
 185     |   LPAREN! e=conditionalExpression RPAREN!
 186         ;
 187
 188 /** Match a, a.b.c refs
 189  */
 190 identPrimary returns [Expression e = null]
 191         :       IDENT
 192                 (
 193             options {
 194                                 // .ident could match here or in postfixExpression.
 195                                 // We do want to match here.  Turn off warning.
 196                                 greedy=true;
 197                         }
 198                 :       DOT^ IDENT
 199                 )*
 200     ;
 201
 202 constant returns [Expression e = null]
 203         :       l1:NUM_INT {e=new ConstantInteger(l1.getText());}
 204         |       l2:CHAR_LITERAL {e=new ConstantChar(l2.getText());}
 205         |       l3:STRING_LITERAL {e=new ConstantString(l3.getText().substring(1, l3.getText().length()-1)); }
 206         |       l4:NUM_FLOAT {e=new ConstantFloat(l4.getText());}
 207         |       l5:NUM_LONG {e=new ConstantLong(l5.getText());}
 208         |       l6:NUM_DOUBLE {e=new ConstantDouble(l6.getText());}
 209         ;
 210
 211
 212 //----------------------------------------------------------------------------
 213 // The Java scanner
 214 //----------------------------------------------------------------------------
 215 class JavaLexer extends Lexer;
 216
 217 options {
 218         exportVocab=Java;      // call the vocabulary "Java"
 219         testLiterals=false;    // don't automatically test for literals
 220         k=4;                   // four characters of lookahead
 221         charVocabulary='\u0003'..'\uFFFF';
 222         // without inlining some bitset tests, couldn't do unicode;
 223         // I need to make ANTLR generate smaller bitsets; see
 224         // bottom of JavaLexer.java
 225         codeGenBitsetTestThreshold=20;
 226 }
 227
 228
 229
 230 // OPERATORS
 231 QUESTION                :       '?'             ;
 232 LPAREN                  :       '('             ;
 233 RPAREN                  :       ')'             ;
 234 LBRACK                  :       '['             ;
 235 RBRACK                  :       ']'             ;
 236 LCURLY                  :       '{'             ;
 237 RCURLY                  :       '}'             ;
 238 COLON                   :       ':'             ;
 239 COMMA                   :       ','             ;
 240 //DOT                   :       '.'             ;
 241 ASSIGN                  :       '='             ;
 242 EQUAL                   :       "=="    ;
 243 LNOT                    :       '!'             ;
 244 BNOT                    :       '~'             ;
 245 NOT_EQUAL               :       "!="    ;
 246 DIV                             :       '/'             ;
 247 DIV_ASSIGN              :       "/="    ;
 248 PLUS                    :       '+'             ;
 249 PLUS_ASSIGN             :       "+="    ;
 250 INC                             :       "++"    ;
 251 MINUS                   :       '-'             ;
 252 MINUS_ASSIGN    :       "-="    ;
 253 DEC                             :       "--"    ;
 254 STAR                    :       '*'             ;
 255 STAR_ASSIGN             :       "*="    ;
 256 MOD                             :       '%'             ;
 257 MOD_ASSIGN              :       "%="    ;
 258 SR                              :       ">>"    ;
 259 SR_ASSIGN               :       ">>="   ;
 260 BSR                             :       ">>>"   ;
 261 BSR_ASSIGN              :       ">>>="  ;
 262 GE                              :       ">="    ;
 263 GT                              :       ">"             ;
 264 SL                              :       "<<"    ;
 265 SL_ASSIGN               :       "<<="   ;
 266 LE                              :       "<="    ;
 267 LT                              :       '<'             ;
 268 BXOR                    :       '^'             ;
 269 BXOR_ASSIGN             :       "^="    ;
 270 BOR                             :       '|'             ;
 271 BOR_ASSIGN              :       "|="    ;
 272 LOR                             :       "||"    ;
 273 BAND                    :       '&'             ;
 274 BAND_ASSIGN             :       "&="    ;
 275 LAND                    :       "&&"    ;
 276 SEMI                    :       ';'             ;
 277
 278
 279 // Whitespace -- ignored
 280 WS      :       (       ' '
 281                 |       '\t'
 282                 |       '\f'
 283                         // handle newlines
 284                 |       (       options {generateAmbigWarnings=false;}
 285                         :       "\r\n"  // Evil DOS
 286                         |       '\r'    // Macintosh
 287                         |       '\n'    // Unix (the right way)
 288                         )
 289                         { newline(); }
 290                 )+
 291                 { _ttype = Token.SKIP; }
 292         ;
 293
 294 // Single-line comments
 295 SL_COMMIT
 296         :       "//"
 297                 (~('\n'|'\r'))* ('\n'|'\r'('\n')?)
 298                 {$setType(Token.SKIP); newline();}
 299         ;
 300
 301 // multiple-line comments
 302 ML_COMMENT
 303         :       "/*"
 304                 (       /*      '\r' '\n' can be matched in one alternative or by matching
 305                                 '\r' in one iteration and '\n' in another.  I am trying to
 306                                 handle any flavor of newline that comes in, but the language
 307                                 that allows both "\r\n" and "\r" and "\n" to all be valid
 308                                 newline is ambiguous.  Consequently, the resulting grammar
 309                                 must be ambiguous.  I'm shutting this warning off.
 310                          */
 311                         options {
 312                                 generateAmbigWarnings=false;
 313                         }
 314                 :
 315                         { LA(2)!='/' }? '*'
 316                 |       '\r' '\n'               {newline();}
 317                 |       '\r'                    {newline();}
 318                 |       '\n'                    {newline();}
 319                 |       ~('*'|'\n'|'\r')
 320                 )*
 321                 "*/"
 322                 {$setType(Token.SKIP);}
 323         ;
 324
 325
 326 // character literals
 327 CHAR_LITERAL
 328         :       '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\''
 329         ;
 330
 331 // string literals
 332 STRING_LITERAL
 333         :       '"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"'
 334         ;
 335
 336
 337 // escape sequence -- note that this is protected; it can only be called
 338 //   from another lexer rule -- it will not ever directly return a token to
 339 //   the parser
 340 // There are various ambiguities hushed in this rule.  The optional
 341 // '0'...'9' digit matches should be matched here rather than letting
 342 // them go back to STRING_LITERAL to be matched.  ANTLR does the
 343 // right thing by matching immediately; hence, it's ok to shut off
 344 // the FOLLOW ambig warnings.
 345 protected
 346 ESC
 347         :       '\\'
 348                 (       'n'
 349                 |       'r'
 350                 |       't'
 351                 |       'b'
 352                 |       'f'
 353                 |       '"'
 354                 |       '\''
 355                 |       '\\'
 356                 |       ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
 357                 |       '0'..'3'
 358                         (
 359                                 options {
 360                                         warnWhenFollowAmbig = false;
 361                                 }
 362                         :       '0'..'7'
 363                                 (
 364                                         options {
 365                                                 warnWhenFollowAmbig = false;
 366                                         }
 367                                 :       '0'..'7'
 368                                 )?
 369                         )?
 370                 |       '4'..'7'
 371                         (
 372                                 options {
 373                                         warnWhenFollowAmbig = false;
 374                                 }
 375                         :       '0'..'7'
 376                         )?
 377                 )
 378         ;
 379
 380
 381 // hexadecimal digit (again, note it's protected!)
 382 protected
 383 HEX_DIGIT
 384         :       ('0'..'9'|'A'..'F'|'a'..'f')
 385         ;
 386
 387
 388 // a dummy rule to force vocabulary to be all characters (except special
 389 //   ones that ANTLR uses internally (0 to 2)
 390 protected
 391 VOCAB
 392         :       '\3'..'\377'
 393         ;
 394
 395
 396 // an identifier.  Note that testLiterals is set to true!  This means
 397 // that after we match the rule, we look in the literals table to see
 398 // if it's a literal or really an identifer
 399 IDENT
 400         options {testLiterals=true;}
 401         :       ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
 402         ;
 403
 404
 405 // a numeric literal
 406 NUM_INT
 407         {boolean isDecimal=false; Token t=null;}
 408     :   '.' {_ttype = DOT;}
 409             (   ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})?
 410                 {
 411                                 if (t != null && t.getText().toUpperCase().indexOf('F')>=0) {
 412                         _ttype = NUM_FLOAT;
 413                                 }
 414                                 else {
 415                         _ttype = NUM_DOUBLE; // assume double
 416                                 }
 417                                 }
 418             )?
 419
 420         |       (       '0' {isDecimal = true;} // special case for just '0'
 421                         (       ('x'|'X')
 422                                 (                                                                                       // hex
 423                                         // the 'e'|'E' and float suffix stuff look
 424                                         // like hex digits, hence the (...)+ doesn't
 425                                         // know when to stop: ambig.  ANTLR resolves
 426                                         // it correctly by matching immediately.  It
 427                                         // is therefor ok to hush warning.
 428                                         options {
 429                                                 warnWhenFollowAmbig=false;
 430                                         }
 431                                 :       HEX_DIGIT
 432                                 )+
 433
 434                         |       //float or double with leading zero
 435                                 (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+
 436
 437                         |       ('0'..'7')+                                                                     // octal
 438                         )?
 439                 |       ('1'..'9') ('0'..'9')*  {isDecimal=true;}               // non-zero decimal
 440                 )
 441                 (       ('l'|'L') { _ttype = NUM_LONG; }
 442
 443                 // only check to see if it's a float if looks like decimal so far
 444                 |       {isDecimal}?
 445             (   '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})?
 446             |   EXPONENT (f3:FLOAT_SUFFIX {t=f3;})?
 447             |   f4:FLOAT_SUFFIX {t=f4;}
 448             )
 449             {
 450                         if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) {
 451                 _ttype = NUM_FLOAT;
 452                         }
 453             else {
 454                         _ttype = NUM_DOUBLE; // assume double
 455                         }
 456                         }
 457         )?
 458         ;
 459
 460
 461 // a couple protected methods to assist in matching floating point numbers
 462 protected
 463 EXPONENT
 464         :       ('e'|'E') ('+'|'-')? ('0'..'9')+
 465         ;
 466
 467
 468 protected
 469 FLOAT_SUFFIX
 470         :       'f'|'F'|'d'|'D'
 471         ;