1 /**************************************************************************
3 ** This file is part of Qt Creator
5 ** Copyright (c) 2011 Nokia Corporation and/or its subsidiary(-ies).
7 ** Contact: Nokia Corporation (info@qt.nokia.com)
10 ** GNU Lesser General Public License Usage
12 ** This file may be used under the terms of the GNU Lesser General Public
13 ** License version 2.1 as published by the Free Software Foundation and
14 ** appearing in the file LICENSE.LGPL included in the packaging of this file.
15 ** Please review the following information to ensure the GNU Lesser General
16 ** Public License version 2.1 requirements will be met:
17 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
19 ** In addition, as a special exception, Nokia gives you certain additional
20 ** rights. These rights are described in the Nokia Qt LGPL Exception
21 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
25 ** Alternatively, this file may be used in accordance with the terms and
26 ** conditions contained in a signed written agreement between you and Nokia.
28 ** If you have questions regarding the use of this file, please contact
29 ** Nokia at qt-info@nokia.com.
31 **************************************************************************/
32 // Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
34 // Permission is hereby granted, free of charge, to any person obtaining a copy
35 // of this software and associated documentation files (the "Software"), to deal
36 // in the Software without restriction, including without limitation the rights
37 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
38 // copies of the Software, and to permit persons to whom the Software is
39 // furnished to do so, subject to the following conditions:
41 // The above copyright notice and this permission notice shall be included in
42 // all copies or substantial portions of the Software.
44 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
45 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
46 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
47 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
48 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
49 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
54 #include "TranslationUnit.h"
59 using namespace CPlusPlus;
61 Lexer::Lexer(TranslationUnit *unit)
62 : _translationUnit(unit),
63 _state(State_Default),
67 f._scanKeywords = true;
68 setSource(_translationUnit->firstSourceChar(),
69 _translationUnit->lastSourceChar());
72 Lexer::Lexer(const char *firstChar, const char *lastChar)
73 : _translationUnit(0),
74 _state(State_Default),
78 f._scanKeywords = true;
79 setSource(firstChar, lastChar);
85 TranslationUnit *Lexer::translationUnit() const
86 { return _translationUnit; }
88 Control *Lexer::control() const
91 return _translationUnit->control();
96 void Lexer::setSource(const char *firstChar, const char *lastChar)
98 _firstChar = firstChar;
100 _currentChar = _firstChar - 1;
101 _tokenStart = _currentChar;
105 void Lexer::setStartWithNewline(bool enabled)
113 int Lexer::state() const
116 void Lexer::setState(int state)
119 bool Lexer::qtMocRunEnabled() const
120 { return f._qtMocRunEnabled; }
122 void Lexer::setQtMocRunEnabled(bool onoff)
123 { f._qtMocRunEnabled = onoff; }
125 bool Lexer::cxx0xEnabled() const
126 { return f._cxx0xEnabled; }
128 void Lexer::setCxxOxEnabled(bool onoff)
129 { f._cxx0xEnabled = onoff; }
131 bool Lexer::objCEnabled() const
132 { return f._objCEnabled; }
134 void Lexer::setObjCEnabled(bool onoff)
135 { f._objCEnabled = onoff; }
137 bool Lexer::isIncremental() const
138 { return f._isIncremental; }
140 void Lexer::setIncremental(bool isIncremental)
141 { f._isIncremental = isIncremental; }
143 bool Lexer::scanCommentTokens() const
144 { return f._scanCommentTokens; }
146 void Lexer::setScanCommentTokens(bool onoff)
147 { f._scanCommentTokens = onoff; }
149 bool Lexer::scanKeywords() const
150 { return f._scanKeywords; }
152 void Lexer::setScanKeywords(bool onoff)
153 { f._scanKeywords = onoff; }
155 void Lexer::setScanAngleStringLiteralTokens(bool onoff)
156 { f._scanAngleStringLiteralTokens = onoff; }
158 void Lexer::pushLineStartOffset()
162 if (_translationUnit)
163 _translationUnit->pushLineOffset(_currentChar - _firstChar);
166 unsigned Lexer::tokenOffset() const
167 { return _tokenStart - _firstChar; }
169 unsigned Lexer::tokenLength() const
170 { return _currentChar - _tokenStart; }
172 const char *Lexer::tokenBegin() const
173 { return _tokenStart; }
175 const char *Lexer::tokenEnd() const
176 { return _currentChar; }
178 unsigned Lexer::currentLine() const
179 { return _currentLine; }
181 void Lexer::scan(Token *tok)
185 tok->f.length = _currentChar - _tokenStart;
188 void Lexer::scan_helper(Token *tok)
191 while (_yychar && std::isspace(_yychar)) {
192 if (_yychar == '\n') {
193 tok->f.joined = false;
194 tok->f.newline = true;
196 tok->f.whitespace = true;
201 if (! _translationUnit)
202 tok->lineno = _currentLine;
204 _tokenStart = _currentChar;
205 tok->offset = _currentChar - _firstChar;
207 if (_state == State_MultiLineComment || _state == State_MultiLineDoxyComment) {
208 const int originalState = _state;
211 tok->f.kind = T_EOF_SYMBOL;
220 if (_yychar == '/') {
222 _state = State_Default;
228 if (! f._scanCommentTokens)
231 else if (originalState == State_MultiLineComment)
232 tok->f.kind = T_COMMENT;
234 tok->f.kind = T_DOXY_COMMENT;
239 tok->f.kind = T_EOF_SYMBOL;
243 unsigned char ch = _yychar;
248 while (_yychar != '\n' && std::isspace(_yychar))
250 // ### assert(! _yychar || _yychar == '\n');
251 if (_yychar == '\n') {
252 tok->f.joined = true;
253 tok->f.newline = false;
258 case '"': case '\'': {
259 const char quote = ch;
261 tok->f.kind = quote == '"'
265 const char *yytext = _currentChar;
267 while (_yychar && _yychar != quote) {
270 else if (_yychar != '\\')
273 yyinp(); // skip `\\'
279 // assert(_yychar == quote);
281 int yylen = _currentChar - yytext;
283 if (_yychar == quote)
287 tok->string = control()->stringLiteral(yytext, yylen);
291 tok->f.kind = T_LBRACE;
295 tok->f.kind = T_RBRACE;
299 tok->f.kind = T_LBRACKET;
303 tok->f.kind = T_RBRACKET;
307 if (_yychar == '#') {
308 tok->f.kind = T_POUND_POUND;
311 tok->f.kind = T_POUND;
316 tok->f.kind = T_LPAREN;
320 tok->f.kind = T_RPAREN;
324 tok->f.kind = T_SEMICOLON;
328 if (_yychar == ':') {
330 tok->f.kind = T_COLON_COLON;
332 tok->f.kind = T_COLON;
337 if (_yychar == '*') {
339 tok->f.kind = T_DOT_STAR;
340 } else if (_yychar == '.') {
342 // ### assert(_yychar);
343 if (_yychar == '.') {
345 tok->f.kind = T_DOT_DOT_DOT;
347 tok->f.kind = T_ERROR;
349 } else if (std::isdigit(_yychar)) {
350 const char *yytext = _currentChar - 2;
352 if (_yychar == 'e' || _yychar == 'E') {
354 if (_yychar == '-' || _yychar == '+') {
356 // ### assert(std::isdigit(_yychar));
358 } else if (std::isalnum(_yychar) || _yychar == '.') {
364 int yylen = _currentChar - yytext;
365 tok->f.kind = T_NUMERIC_LITERAL;
367 tok->number = control()->numericLiteral(yytext, yylen);
374 tok->f.kind = T_QUESTION;
378 if (_yychar == '+') {
380 tok->f.kind = T_PLUS_PLUS;
381 } else if (_yychar == '=') {
383 tok->f.kind = T_PLUS_EQUAL;
385 tok->f.kind = T_PLUS;
390 if (_yychar == '-') {
392 tok->f.kind = T_MINUS_MINUS;
393 } else if (_yychar == '=') {
395 tok->f.kind = T_MINUS_EQUAL;
396 } else if (_yychar == '>') {
398 if (_yychar == '*') {
400 tok->f.kind = T_ARROW_STAR;
402 tok->f.kind = T_ARROW;
405 tok->f.kind = T_MINUS;
410 if (_yychar == '=') {
412 tok->f.kind = T_STAR_EQUAL;
414 tok->f.kind = T_STAR;
419 if (_yychar == '/') {
424 if (_yychar == '/' || _yychar == '!') {
430 if (_yychar != '\n' && std::isspace(_yychar))
434 while (_yychar && _yychar != '\n')
437 if (! f._scanCommentTokens)
440 tok->f.kind = doxy ? T_CPP_DOXY_COMMENT : T_CPP_COMMENT;
442 } else if (_yychar == '*') {
447 if (_yychar == '*' || _yychar == '!') {
448 const char ch = _yychar;
452 if (ch == '*' && _yychar == '/')
458 if (! _yychar || std::isspace(_yychar))
463 if (_yychar != '*') {
476 _state = doxy ? State_MultiLineDoxyComment : State_MultiLineComment;
478 if (! f._scanCommentTokens)
481 tok->f.kind = doxy ? T_DOXY_COMMENT : T_COMMENT;
483 } else if (_yychar == '=') {
485 tok->f.kind = T_SLASH_EQUAL;
487 tok->f.kind = T_SLASH;
492 if (_yychar == '=') {
494 tok->f.kind = T_PERCENT_EQUAL;
496 tok->f.kind = T_PERCENT;
501 if (_yychar == '=') {
503 tok->f.kind = T_CARET_EQUAL;
505 tok->f.kind = T_CARET;
510 if (_yychar == '&') {
512 tok->f.kind = T_AMPER_AMPER;
513 } else if (_yychar == '=') {
515 tok->f.kind = T_AMPER_EQUAL;
517 tok->f.kind = T_AMPER;
522 if (_yychar == '|') {
524 tok->f.kind = T_PIPE_PIPE;
525 } else if (_yychar == '=') {
527 tok->f.kind = T_PIPE_EQUAL;
529 tok->f.kind = T_PIPE;
534 if (_yychar == '=') {
536 tok->f.kind = T_TILDE_EQUAL;
538 tok->f.kind = T_TILDE;
543 if (_yychar == '=') {
545 tok->f.kind = T_EXCLAIM_EQUAL;
547 tok->f.kind = T_EXCLAIM;
552 if (_yychar == '=') {
554 tok->f.kind = T_EQUAL_EQUAL;
556 tok->f.kind = T_EQUAL;
561 if (f._scanAngleStringLiteralTokens) {
562 const char *yytext = _currentChar;
563 while (_yychar && _yychar != '>')
565 int yylen = _currentChar - yytext;
566 // ### assert(_yychar == '>');
570 tok->string = control()->stringLiteral(yytext, yylen);
571 tok->f.kind = T_ANGLE_STRING_LITERAL;
572 } else if (_yychar == '<') {
574 if (_yychar == '=') {
576 tok->f.kind = T_LESS_LESS_EQUAL;
578 tok->f.kind = T_LESS_LESS;
579 } else if (_yychar == '=') {
581 tok->f.kind = T_LESS_EQUAL;
583 tok->f.kind = T_LESS;
588 if (_yychar == '>') {
590 if (_yychar == '=') {
592 tok->f.kind = T_GREATER_GREATER_EQUAL;
594 tok->f.kind = T_LESS_LESS;
595 tok->f.kind = T_GREATER_GREATER;
596 } else if (_yychar == '=') {
598 tok->f.kind = T_GREATER_EQUAL;
600 tok->f.kind = T_GREATER;
605 tok->f.kind = T_COMMA;
609 if (f._objCEnabled) {
610 if (ch == '@' && _yychar >= 'a' && _yychar <= 'z') {
611 const char *yytext = _currentChar;
615 if (! (isalnum(_yychar) || _yychar == '_' || _yychar == '$'))
619 const int yylen = _currentChar - yytext;
620 tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
622 } else if (ch == '@' && _yychar == '"') {
623 // objc @string literals
626 tok->f.kind = T_AT_STRING_LITERAL;
628 const char *yytext = _currentChar;
630 while (_yychar && _yychar != '"') {
634 yyinp(); // skip `\\'
640 // assert(_yychar == '"');
642 int yylen = _currentChar - yytext;
648 tok->string = control()->stringLiteral(yytext, yylen);
654 if (ch == 'L' && (_yychar == '"' || _yychar == '\'')) {
655 // wide char/string literals
659 const char quote = ch;
661 tok->f.kind = quote == '"'
662 ? T_WIDE_STRING_LITERAL
663 : T_WIDE_CHAR_LITERAL;
665 const char *yytext = _currentChar;
667 while (_yychar && _yychar != quote) {
671 yyinp(); // skip `\\'
677 // assert(_yychar == quote);
679 int yylen = _currentChar - yytext;
681 if (_yychar == quote)
685 tok->string = control()->stringLiteral(yytext, yylen);
686 } else if (std::isalpha(ch) || ch == '_' || ch == '$') {
687 const char *yytext = _currentChar - 1;
688 while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$')
690 int yylen = _currentChar - yytext;
692 tok->f.kind = classify(yytext, yylen, f._qtMocRunEnabled, f._cxx0xEnabled);
694 tok->f.kind = T_IDENTIFIER;
696 if (tok->f.kind == T_IDENTIFIER) {
697 tok->f.kind = classifyOperator(yytext, yylen);
700 tok->identifier = control()->identifier(yytext, yylen);
703 } else if (std::isdigit(ch)) {
704 const char *yytext = _currentChar - 1;
706 if (_yychar == 'e' || _yychar == 'E') {
708 if (_yychar == '-' || _yychar == '+') {
710 // ### assert(std::isdigit(_yychar));
712 } else if (std::isalnum(_yychar) || _yychar == '.') {
718 int yylen = _currentChar - yytext;
719 tok->f.kind = T_NUMERIC_LITERAL;
721 tok->number = control()->numericLiteral(yytext, yylen);
724 tok->f.kind = T_ERROR;