1 // lex.h -- Go frontend lexer. -*- C++ -*-
3 // Copyright 2009 The Go Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file.
17 // The keywords. These must be in sorted order, other than
18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in
23 KEYWORD_INVALID, // Not a keyword.
52 // A token returned from the lexer.
57 // Token classification.
62 // Token indicates end of input.
64 // Token is a keyword.
66 // Token is an identifier.
68 // Token is a string of characters.
70 // Token is an operator.
72 // Token is an integer.
74 // Token is a floating point number.
76 // Token is an imaginary number.
82 Token& operator=(const Token&);
84 // Get token classification.
86 classification() const
87 { return this->classification_; }
89 // Make a token for an invalid value.
91 make_invalid_token(source_location location)
92 { return Token(TOKEN_INVALID, location); }
94 // Make a token representing end of file.
96 make_eof_token(source_location location)
97 { return Token(TOKEN_EOF, location); }
99 // Make a keyword token.
101 make_keyword_token(Keyword keyword, source_location location)
103 Token tok(TOKEN_KEYWORD, location);
104 tok.u_.keyword = keyword;
108 // Make an identifier token.
110 make_identifier_token(const std::string& value, bool is_exported,
111 source_location location)
113 Token tok(TOKEN_IDENTIFIER, location);
114 tok.u_.identifier_value.name = new std::string(value);
115 tok.u_.identifier_value.is_exported = is_exported;
119 // Make a quoted string token.
121 make_string_token(const std::string& value, source_location location)
123 Token tok(TOKEN_STRING, location);
124 tok.u_.string_value = new std::string(value);
128 // Make an operator token.
130 make_operator_token(Operator op, source_location location)
132 Token tok(TOKEN_OPERATOR, location);
137 // Make an integer token.
139 make_integer_token(mpz_t val, source_location location)
141 Token tok(TOKEN_INTEGER, location);
142 mpz_init(tok.u_.integer_value);
143 mpz_swap(tok.u_.integer_value, val);
147 // Make a float token.
149 make_float_token(mpfr_t val, source_location location)
151 Token tok(TOKEN_FLOAT, location);
152 mpfr_init(tok.u_.float_value);
153 mpfr_swap(tok.u_.float_value, val);
157 // Make a token for an imaginary number.
159 make_imaginary_token(mpfr_t val, source_location location)
161 Token tok(TOKEN_IMAGINARY, location);
162 mpfr_init(tok.u_.float_value);
163 mpfr_swap(tok.u_.float_value, val);
167 // Get the location of the token.
170 { return this->location_; }
172 // Return whether this is an invalid token.
175 { return this->classification_ == TOKEN_INVALID; }
177 // Return whether this is the EOF token.
180 { return this->classification_ == TOKEN_EOF; }
182 // Return the keyword value for a keyword token.
186 gcc_assert(this->classification_ == TOKEN_KEYWORD);
187 return this->u_.keyword;
190 // Return whether this is an identifier.
192 is_identifier() const
193 { return this->classification_ == TOKEN_IDENTIFIER; }
195 // Return the identifier.
199 gcc_assert(this->classification_ == TOKEN_IDENTIFIER);
200 return *this->u_.identifier_value.name;
203 // Return whether the identifier is exported.
205 is_identifier_exported() const
207 gcc_assert(this->classification_ == TOKEN_IDENTIFIER);
208 return this->u_.identifier_value.is_exported;
211 // Return whether this is a string.
215 return this->classification_ == TOKEN_STRING;
218 // Return the value of a string. The returned value is a string of
223 gcc_assert(this->classification_ == TOKEN_STRING);
224 return *this->u_.string_value;
227 // Return the value of an integer.
229 integer_value() const
231 gcc_assert(this->classification_ == TOKEN_INTEGER);
232 return &this->u_.integer_value;
235 // Return the value of a float.
239 gcc_assert(this->classification_ == TOKEN_FLOAT);
240 return &this->u_.float_value;
243 // Return the value of an imaginary number.
245 imaginary_value() const
247 gcc_assert(this->classification_ == TOKEN_IMAGINARY);
248 return &this->u_.float_value;
251 // Return the operator value for an operator token.
255 gcc_assert(this->classification_ == TOKEN_OPERATOR);
259 // Return whether this token is KEYWORD.
261 is_keyword(Keyword keyword) const
263 return (this->classification_ == TOKEN_KEYWORD
264 && this->u_.keyword == keyword);
267 // Return whether this token is OP.
269 is_op(Operator op) const
270 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
272 // Print the token for debugging.
277 // Private constructor used by make_..._token functions above.
278 Token(Classification, source_location);
284 // The token classification.
285 Classification classification_;
288 // The keyword value for TOKEN_KEYWORD.
290 // The token value for TOKEN_IDENTIFIER.
293 // The name of the identifier. This has been mangled to only
294 // include ASCII characters.
296 // Whether this name should be exported. This is true if the
297 // first letter in the name is upper case.
300 // The string value for TOKEN_STRING.
301 std::string* string_value;
302 // The token value for TOKEN_INTEGER.
304 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
306 // The token value for TOKEN_OPERATOR or the keyword value
309 // The source location.
310 source_location location_;
318 Lex(const char* input_file_name, FILE* input_file);
322 // Return the next token.
326 // Return whether the identifier NAME should be exported. NAME is a
327 // mangled name which includes only ASCII characters.
329 is_exported_name(const std::string& name);
331 // A helper function. Append V to STR. IS_CHARACTER is true if V
332 // is a Unicode character which should be converted into UTF-8,
333 // false if it is a byte value to be appended directly. The
334 // location is used to warn about an out of range character.
336 append_char(unsigned int v, bool is_charater, std::string* str,
339 // A helper function. Fetch a UTF-8 character from STR and store it
340 // in *VALUE. Return the number of bytes read from STR. Return 0
341 // if STR does not point to a valid UTF-8 character.
343 fetch_char(const char* str, unsigned int *value);
352 // The current location.
356 // A position CHARS column positions before the current location.
358 earlier_location(int chars) const;
369 { return Token::make_invalid_token(this->location()); }
373 { return Token::make_eof_token(this->location()); }
376 make_operator(Operator op, int chars)
377 { return Token::make_operator_token(op, this->earlier_location(chars)); }
383 could_be_exponent(const char*, const char*);
398 advance_one_utf8_char(const char*, unsigned int*, bool*);
401 advance_one_char(const char*, bool, unsigned int*, bool*);
404 is_unicode_digit(unsigned int c);
407 is_unicode_letter(unsigned int c);
410 is_unicode_uppercase(unsigned int c);
413 is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
417 three_character_operator(char, char, char);
420 two_character_operator(char, char);
423 one_character_operator(char);
431 // The input file name.
432 const char* input_file_name_;
435 // The line buffer. This holds the current line.
437 // The size of the line buffer.
439 // The nmber of characters in the current line.
441 // The current offset in linebuf_.
443 // The current line number.
445 // Whether to add a semicolon if we see a newline now.
446 bool add_semi_at_eol_;
449 #endif // !defined(GO_LEX_H)