1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
14 #include "go-linemap.h"
18 // The keywords. These must be in sorted order, other than
19 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
24 KEYWORD_INVALID, // Not a keyword.
53 // A token returned from the lexer.
58 // Token classification.
63 // Token indicates end of input.
65 // Token is a keyword.
67 // Token is an identifier.
69 // Token is a string of characters.
71 // Token is an operator.
73 // Token is an integer.
75 // Token is a floating point number.
77 // Token is an imaginary number.
83 Token& operator=(const Token&);
85 // Get token classification.
87 classification() const
88 { return this->classification_; }
90 // Make a token for an invalid value.
92 make_invalid_token(Location location)
93 { return Token(TOKEN_INVALID, location); }
95 // Make a token representing end of file.
97 make_eof_token(Location location)
98 { return Token(TOKEN_EOF, location); }
100 // Make a keyword token.
102 make_keyword_token(Keyword keyword, Location location)
104 Token tok(TOKEN_KEYWORD, location);
105 tok.u_.keyword = keyword;
109 // Make an identifier token.
111 make_identifier_token(const std::string& value, bool is_exported,
114 Token tok(TOKEN_IDENTIFIER, location);
115 tok.u_.identifier_value.name = new std::string(value);
116 tok.u_.identifier_value.is_exported = is_exported;
120 // Make a quoted string token.
122 make_string_token(const std::string& value, Location location)
124 Token tok(TOKEN_STRING, location);
125 tok.u_.string_value = new std::string(value);
129 // Make an operator token.
131 make_operator_token(Operator op, Location location)
133 Token tok(TOKEN_OPERATOR, location);
138 // Make an integer token.
140 make_integer_token(mpz_t val, Location location)
142 Token tok(TOKEN_INTEGER, location);
143 mpz_init(tok.u_.integer_value);
144 mpz_swap(tok.u_.integer_value, val);
148 // Make a float token.
150 make_float_token(mpfr_t val, Location location)
152 Token tok(TOKEN_FLOAT, location);
153 mpfr_init(tok.u_.float_value);
154 mpfr_swap(tok.u_.float_value, val);
158 // Make a token for an imaginary number.
160 make_imaginary_token(mpfr_t val, Location location)
162 Token tok(TOKEN_IMAGINARY, location);
163 mpfr_init(tok.u_.float_value);
164 mpfr_swap(tok.u_.float_value, val);
168 // Get the location of the token.
171 { return this->location_; }
173 // Return whether this is an invalid token.
176 { return this->classification_ == TOKEN_INVALID; }
178 // Return whether this is the EOF token.
181 { return this->classification_ == TOKEN_EOF; }
183 // Return the keyword value for a keyword token.
187 go_assert(this->classification_ == TOKEN_KEYWORD);
188 return this->u_.keyword;
191 // Return whether this is an identifier.
193 is_identifier() const
194 { return this->classification_ == TOKEN_IDENTIFIER; }
196 // Return the identifier.
200 go_assert(this->classification_ == TOKEN_IDENTIFIER);
201 return *this->u_.identifier_value.name;
204 // Return whether the identifier is exported.
206 is_identifier_exported() const
208 go_assert(this->classification_ == TOKEN_IDENTIFIER);
209 return this->u_.identifier_value.is_exported;
212 // Return whether this is a string.
216 return this->classification_ == TOKEN_STRING;
219 // Return the value of a string. The returned value is a string of
224 go_assert(this->classification_ == TOKEN_STRING);
225 return *this->u_.string_value;
228 // Return the value of an integer.
230 integer_value() const
232 go_assert(this->classification_ == TOKEN_INTEGER);
233 return &this->u_.integer_value;
236 // Return the value of a float.
240 go_assert(this->classification_ == TOKEN_FLOAT);
241 return &this->u_.float_value;
244 // Return the value of an imaginary number.
246 imaginary_value() const
248 go_assert(this->classification_ == TOKEN_IMAGINARY);
249 return &this->u_.float_value;
252 // Return the operator value for an operator token.
256 go_assert(this->classification_ == TOKEN_OPERATOR);
260 // Return whether this token is KEYWORD.
262 is_keyword(Keyword keyword) const
264 return (this->classification_ == TOKEN_KEYWORD
265 && this->u_.keyword == keyword);
268 // Return whether this token is OP.
270 is_op(Operator op) const
271 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
273 // Print the token for debugging.
278 // Private constructor used by make_..._token functions above.
279 Token(Classification, Location);
285 // The token classification.
286 Classification classification_;
289 // The keyword value for TOKEN_KEYWORD.
291 // The token value for TOKEN_IDENTIFIER.
294 // The name of the identifier. This has been mangled to only
295 // include ASCII characters.
297 // Whether this name should be exported. This is true if the
298 // first letter in the name is upper case.
301 // The string value for TOKEN_STRING.
302 std::string* string_value;
303 // The token value for TOKEN_INTEGER.
305 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
307 // The token value for TOKEN_OPERATOR or the keyword value
310 // The source location.
319 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
323 // Return the next token.
327 // Return whether the identifier NAME should be exported. NAME is a
328 // mangled name which includes only ASCII characters.
330 is_exported_name(const std::string& name);
332 // A helper function. Append V to STR. IS_CHARACTER is true if V
333 // is a Unicode character which should be converted into UTF-8,
334 // false if it is a byte value to be appended directly. The
335 // location is used to warn about an out of range character.
337 append_char(unsigned int v, bool is_charater, std::string* str,
340 // A helper function. Fetch a UTF-8 character from STR and store it
341 // in *VALUE. Return the number of bytes read from STR. Return 0
342 // if STR does not point to a valid UTF-8 character.
344 fetch_char(const char* str, unsigned int *value);
353 // The current location.
357 // A position CHARS column positions before the current location.
359 earlier_location(int chars) const;
370 { return Token::make_invalid_token(this->location()); }
374 { return Token::make_eof_token(this->location()); }
377 make_operator(Operator op, int chars)
378 { return Token::make_operator_token(op, this->earlier_location(chars)); }
384 could_be_exponent(const char*, const char*);
399 advance_one_utf8_char(const char*, unsigned int*, bool*);
402 advance_one_char(const char*, bool, unsigned int*, bool*);
405 is_unicode_digit(unsigned int c);
408 is_unicode_letter(unsigned int c);
411 is_unicode_uppercase(unsigned int c);
414 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
418 three_character_operator(char, char, char);
421 two_character_operator(char, char);
424 one_character_operator(char);
432 // The input file name.
433 const char* input_file_name_;
436 // The object used to keep track of file names and line numbers.
438 // The line buffer. This holds the current line.
440 // The size of the line buffer.
442 // The nmber of characters in the current line.
444 // The current offset in linebuf_.
446 // The current line number.
448 // Whether to add a semicolon if we see a newline now.
449 bool add_semi_at_eol_;
452 #endif // !defined(GO_LEX_H)