1 /**************************************************************************
3 ** This file is part of Qt Creator
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
7 ** Contact: Nokia Corporation (qt-info@nokia.com)
11 ** This file contains pre-release code and may not be distributed.
12 ** You may use this file in accordance with the terms and conditions
13 ** contained in the Technology Preview License Agreement accompanying
16 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 ** In addition, as a special exception, Nokia gives you certain additional
26 ** rights. These rights are described in the Nokia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 ** If you have questions regarding the use of this file, please contact
30 ** Nokia at qt-info@nokia.com.
32 **************************************************************************/
33 // Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
35 // Permission is hereby granted, free of charge, to any person obtaining a copy
36 // of this software and associated documentation files (the "Software"), to deal
37 // in the Software without restriction, including without limitation the rights
38 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
39 // copies of the Software, and to permit persons to whom the Software is
40 // furnished to do so, subject to the following conditions:
42 // The above copyright notice and this permission notice shall be included in
43 // all copies or substantial portions of the Software.
45 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
55 #include "TranslationUnit.h"
60 using namespace CPlusPlus;
62 Lexer::Lexer(TranslationUnit *unit)
63 : _translationUnit(unit),
64 _state(State_Default),
68 f._scanKeywords = true;
69 setSource(_translationUnit->firstSourceChar(),
70 _translationUnit->lastSourceChar());
73 Lexer::Lexer(const char *firstChar, const char *lastChar)
74 : _translationUnit(0),
75 _state(State_Default),
79 f._scanKeywords = true;
80 setSource(firstChar, lastChar);
86 TranslationUnit *Lexer::translationUnit() const
87 { return _translationUnit; }
89 Control *Lexer::control() const
92 return _translationUnit->control();
97 void Lexer::setSource(const char *firstChar, const char *lastChar)
99 _firstChar = firstChar;
100 _lastChar = lastChar;
101 _currentChar = _firstChar - 1;
102 _tokenStart = _currentChar;
106 void Lexer::setStartWithNewline(bool enabled)
114 int Lexer::state() const
117 void Lexer::setState(int state)
120 bool Lexer::qtMocRunEnabled() const
121 { return f._qtMocRunEnabled; }
123 void Lexer::setQtMocRunEnabled(bool onoff)
124 { f._qtMocRunEnabled = onoff; }
126 bool Lexer::cxx0xEnabled() const
127 { return f._cxx0xEnabled; }
129 void Lexer::setCxxOxEnabled(bool onoff)
130 { f._cxx0xEnabled = onoff; }
132 bool Lexer::objCEnabled() const
133 { return f._objCEnabled; }
135 void Lexer::setObjCEnabled(bool onoff)
136 { f._objCEnabled = onoff; }
138 bool Lexer::isIncremental() const
139 { return f._isIncremental; }
141 void Lexer::setIncremental(bool isIncremental)
142 { f._isIncremental = isIncremental; }
144 bool Lexer::scanCommentTokens() const
145 { return f._scanCommentTokens; }
147 void Lexer::setScanCommentTokens(bool onoff)
148 { f._scanCommentTokens = onoff; }
150 bool Lexer::scanKeywords() const
151 { return f._scanKeywords; }
153 void Lexer::setScanKeywords(bool onoff)
154 { f._scanKeywords = onoff; }
156 void Lexer::setScanAngleStringLiteralTokens(bool onoff)
157 { f._scanAngleStringLiteralTokens = onoff; }
159 void Lexer::pushLineStartOffset()
163 if (_translationUnit)
164 _translationUnit->pushLineOffset(_currentChar - _firstChar);
167 unsigned Lexer::tokenOffset() const
168 { return _tokenStart - _firstChar; }
170 unsigned Lexer::tokenLength() const
171 { return _currentChar - _tokenStart; }
173 const char *Lexer::tokenBegin() const
174 { return _tokenStart; }
176 const char *Lexer::tokenEnd() const
177 { return _currentChar; }
179 unsigned Lexer::currentLine() const
180 { return _currentLine; }
182 void Lexer::scan(Token *tok)
186 tok->f.length = _currentChar - _tokenStart;
189 void Lexer::scan_helper(Token *tok)
192 while (_yychar && std::isspace(_yychar)) {
193 if (_yychar == '\n') {
194 tok->f.joined = false;
195 tok->f.newline = true;
197 tok->f.whitespace = true;
202 if (! _translationUnit)
203 tok->lineno = _currentLine;
205 _tokenStart = _currentChar;
206 tok->offset = _currentChar - _firstChar;
208 if (_state == State_MultiLineComment || _state == State_MultiLineDoxyComment) {
209 const int originalState = _state;
212 tok->f.kind = T_EOF_SYMBOL;
221 if (_yychar == '/') {
223 _state = State_Default;
229 if (! f._scanCommentTokens)
232 else if (originalState == State_MultiLineComment)
233 tok->f.kind = T_COMMENT;
235 tok->f.kind = T_DOXY_COMMENT;
240 tok->f.kind = T_EOF_SYMBOL;
244 unsigned char ch = _yychar;
249 while (_yychar != '\n' && std::isspace(_yychar))
251 // ### assert(! _yychar || _yychar == '\n');
252 if (_yychar == '\n') {
253 tok->f.joined = true;
254 tok->f.newline = false;
259 case '"': case '\'': {
260 const char quote = ch;
262 tok->f.kind = quote == '"'
266 const char *yytext = _currentChar;
268 while (_yychar && _yychar != quote) {
271 else if (_yychar != '\\')
274 yyinp(); // skip `\\'
280 // assert(_yychar == quote);
282 int yylen = _currentChar - yytext;
284 if (_yychar == quote)
288 tok->string = control()->stringLiteral(yytext, yylen);
292 tok->f.kind = T_LBRACE;
296 tok->f.kind = T_RBRACE;
300 tok->f.kind = T_LBRACKET;
304 tok->f.kind = T_RBRACKET;
308 if (_yychar == '#') {
309 tok->f.kind = T_POUND_POUND;
312 tok->f.kind = T_POUND;
317 tok->f.kind = T_LPAREN;
321 tok->f.kind = T_RPAREN;
325 tok->f.kind = T_SEMICOLON;
329 if (_yychar == ':') {
331 tok->f.kind = T_COLON_COLON;
333 tok->f.kind = T_COLON;
338 if (_yychar == '*') {
340 tok->f.kind = T_DOT_STAR;
341 } else if (_yychar == '.') {
343 // ### assert(_yychar);
344 if (_yychar == '.') {
346 tok->f.kind = T_DOT_DOT_DOT;
348 tok->f.kind = T_ERROR;
350 } else if (std::isdigit(_yychar)) {
351 const char *yytext = _currentChar - 2;
353 if (_yychar == 'e' || _yychar == 'E') {
355 if (_yychar == '-' || _yychar == '+') {
357 // ### assert(std::isdigit(_yychar));
359 } else if (std::isalnum(_yychar) || _yychar == '.') {
365 int yylen = _currentChar - yytext;
366 tok->f.kind = T_NUMERIC_LITERAL;
368 tok->number = control()->numericLiteral(yytext, yylen);
375 tok->f.kind = T_QUESTION;
379 if (_yychar == '+') {
381 tok->f.kind = T_PLUS_PLUS;
382 } else if (_yychar == '=') {
384 tok->f.kind = T_PLUS_EQUAL;
386 tok->f.kind = T_PLUS;
391 if (_yychar == '-') {
393 tok->f.kind = T_MINUS_MINUS;
394 } else if (_yychar == '=') {
396 tok->f.kind = T_MINUS_EQUAL;
397 } else if (_yychar == '>') {
399 if (_yychar == '*') {
401 tok->f.kind = T_ARROW_STAR;
403 tok->f.kind = T_ARROW;
406 tok->f.kind = T_MINUS;
411 if (_yychar == '=') {
413 tok->f.kind = T_STAR_EQUAL;
415 tok->f.kind = T_STAR;
420 if (_yychar == '/') {
425 if (_yychar == '/' || _yychar == '!') {
431 if (_yychar != '\n' && std::isspace(_yychar))
435 while (_yychar && _yychar != '\n')
438 if (! f._scanCommentTokens)
441 tok->f.kind = doxy ? T_CPP_DOXY_COMMENT : T_CPP_COMMENT;
443 } else if (_yychar == '*') {
448 if (_yychar == '*' || _yychar == '!') {
449 const char ch = _yychar;
453 if (ch == '*' && _yychar == '/')
459 if (! _yychar || std::isspace(_yychar))
464 if (_yychar != '*') {
477 _state = doxy ? State_MultiLineDoxyComment : State_MultiLineComment;
479 if (! f._scanCommentTokens)
482 tok->f.kind = doxy ? T_DOXY_COMMENT : T_COMMENT;
484 } else if (_yychar == '=') {
486 tok->f.kind = T_SLASH_EQUAL;
488 tok->f.kind = T_SLASH;
493 if (_yychar == '=') {
495 tok->f.kind = T_PERCENT_EQUAL;
497 tok->f.kind = T_PERCENT;
502 if (_yychar == '=') {
504 tok->f.kind = T_CARET_EQUAL;
506 tok->f.kind = T_CARET;
511 if (_yychar == '&') {
513 tok->f.kind = T_AMPER_AMPER;
514 } else if (_yychar == '=') {
516 tok->f.kind = T_AMPER_EQUAL;
518 tok->f.kind = T_AMPER;
523 if (_yychar == '|') {
525 tok->f.kind = T_PIPE_PIPE;
526 } else if (_yychar == '=') {
528 tok->f.kind = T_PIPE_EQUAL;
530 tok->f.kind = T_PIPE;
535 if (_yychar == '=') {
537 tok->f.kind = T_TILDE_EQUAL;
539 tok->f.kind = T_TILDE;
544 if (_yychar == '=') {
546 tok->f.kind = T_EXCLAIM_EQUAL;
548 tok->f.kind = T_EXCLAIM;
553 if (_yychar == '=') {
555 tok->f.kind = T_EQUAL_EQUAL;
557 tok->f.kind = T_EQUAL;
562 if (f._scanAngleStringLiteralTokens) {
563 const char *yytext = _currentChar;
564 while (_yychar && _yychar != '>')
566 int yylen = _currentChar - yytext;
567 // ### assert(_yychar == '>');
571 tok->string = control()->stringLiteral(yytext, yylen);
572 tok->f.kind = T_ANGLE_STRING_LITERAL;
573 } else if (_yychar == '<') {
575 if (_yychar == '=') {
577 tok->f.kind = T_LESS_LESS_EQUAL;
579 tok->f.kind = T_LESS_LESS;
580 } else if (_yychar == '=') {
582 tok->f.kind = T_LESS_EQUAL;
584 tok->f.kind = T_LESS;
589 if (_yychar == '>') {
591 if (_yychar == '=') {
593 tok->f.kind = T_GREATER_GREATER_EQUAL;
595 tok->f.kind = T_LESS_LESS;
596 tok->f.kind = T_GREATER_GREATER;
597 } else if (_yychar == '=') {
599 tok->f.kind = T_GREATER_EQUAL;
601 tok->f.kind = T_GREATER;
606 tok->f.kind = T_COMMA;
610 if (f._objCEnabled) {
611 if (ch == '@' && _yychar >= 'a' && _yychar <= 'z') {
612 const char *yytext = _currentChar;
616 if (! (isalnum(_yychar) || _yychar == '_' || _yychar == '$'))
620 const int yylen = _currentChar - yytext;
621 tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
623 } else if (ch == '@' && _yychar == '"') {
624 // objc @string literals
627 tok->f.kind = T_AT_STRING_LITERAL;
629 const char *yytext = _currentChar;
631 while (_yychar && _yychar != '"') {
635 yyinp(); // skip `\\'
641 // assert(_yychar == '"');
643 int yylen = _currentChar - yytext;
649 tok->string = control()->stringLiteral(yytext, yylen);
655 if (ch == 'L' && (_yychar == '"' || _yychar == '\'')) {
656 // wide char/string literals
660 const char quote = ch;
662 tok->f.kind = quote == '"'
663 ? T_WIDE_STRING_LITERAL
664 : T_WIDE_CHAR_LITERAL;
666 const char *yytext = _currentChar;
668 while (_yychar && _yychar != quote) {
672 yyinp(); // skip `\\'
678 // assert(_yychar == quote);
680 int yylen = _currentChar - yytext;
682 if (_yychar == quote)
686 tok->string = control()->stringLiteral(yytext, yylen);
687 } else if (std::isalpha(ch) || ch == '_' || ch == '$') {
688 const char *yytext = _currentChar - 1;
689 while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
691 int yylen = _currentChar - yytext;
693 tok->f.kind = classify(yytext, yylen, f._qtMocRunEnabled, f._cxx0xEnabled);
695 tok->f.kind = T_IDENTIFIER;
697 if (tok->f.kind == T_IDENTIFIER) {
698 tok->f.kind = classifyOperator(yytext, yylen);
701 tok->identifier = control()->identifier(yytext, yylen);
704 } else if (std::isdigit(ch)) {
705 const char *yytext = _currentChar - 1;
707 if (_yychar == 'e' || _yychar == 'E') {
709 if (_yychar == '-' || _yychar == '+') {
711 // ### assert(std::isdigit(_yychar));
713 } else if (std::isalnum(_yychar) || _yychar == '.') {
719 int yylen = _currentChar - yytext;
720 tok->f.kind = T_NUMERIC_LITERAL;
722 tok->number = control()->numericLiteral(yytext, yylen);
725 tok->f.kind = T_ERROR;