// Make a general token.
-Token::Token(Classification classification, source_location location)
+Token::Token(Classification classification, Location location)
: classification_(classification), location_(location)
{
}
void
Token::clear()
{
- if (this->classification_ == TOKEN_INTEGER)
+ if (this->classification_ == TOKEN_INTEGER
+ || this->classification_ == TOKEN_CHARACTER)
mpz_clear(this->u_.integer_value);
else if (this->classification_ == TOKEN_FLOAT
|| this->classification_ == TOKEN_IMAGINARY)
case TOKEN_OPERATOR:
this->u_.op = tok.u_.op;
break;
+ case TOKEN_CHARACTER:
case TOKEN_INTEGER:
mpz_init_set(this->u_.integer_value, tok.u_.integer_value);
break;
mpfr_init_set(this->u_.float_value, tok.u_.float_value, GMP_RNDN);
break;
default:
- gcc_unreachable();
+ go_unreachable();
}
}
case TOKEN_OPERATOR:
this->u_.op = tok.u_.op;
break;
+ case TOKEN_CHARACTER:
case TOKEN_INTEGER:
mpz_init_set(this->u_.integer_value, tok.u_.integer_value);
break;
mpfr_init_set(this->u_.float_value, tok.u_.float_value, GMP_RNDN);
break;
default:
- gcc_unreachable();
+ go_unreachable();
}
return *this;
}
case TOKEN_STRING:
fprintf(file, "quoted string \"%s\"", this->u_.string_value->c_str());
break;
+ case TOKEN_CHARACTER:
+ fprintf(file, "character ");
+ mpz_out_str(file, 10, this->u_.integer_value);
+ break;
case TOKEN_INTEGER:
fprintf(file, "integer ");
mpz_out_str(file, 10, this->u_.integer_value);
fprintf(file, "]");
break;
default:
- gcc_unreachable();
+ go_unreachable();
}
break;
default:
- gcc_unreachable();
+ go_unreachable();
}
}
// Class Lex.
-Lex::Lex(const char* input_file_name, FILE* input_file)
+Lex::Lex(const char* input_file_name, FILE* input_file, Linemap* linemap)
: input_file_name_(input_file_name), input_file_(input_file),
- linebuf_(NULL), linebufsize_(120), linesize_(0), lineoff_(0),
- lineno_(0), add_semi_at_eol_(false)
+ linemap_(linemap), linebuf_(NULL), linebufsize_(120), linesize_(0),
+ lineoff_(0), lineno_(0), add_semi_at_eol_(false), extern_()
{
this->linebuf_ = new char[this->linebufsize_];
- linemap_add(line_table, LC_ENTER, 0, input_file_name, 1);
+ this->linemap_->start_file(input_file_name, 0);
}
Lex::~Lex()
{
delete[] this->linebuf_;
- linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
}
// Read a new line from the file.
this->linesize_= got;
this->lineoff_ = 0;
- linemap_line_start(line_table, this->lineno_, this->linesize_);
+ this->linemap_->start_line(this->lineno_, this->linesize_);
return true;
}
// Get the current location.
-source_location
+Location
Lex::location() const
{
- source_location location;
- LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1);
- return location;
+ return this->linemap_->get_location(this->lineoff_ + 1);
}
// Get a location slightly before the current one. This is used for
// slightly more efficient handling of operator tokens.
-source_location
+Location
Lex::earlier_location(int chars) const
{
- source_location location;
- LINEMAP_POSITION_FOR_COLUMN(location, line_table, this->lineoff_ + 1 - chars);
- return location;
+ return this->linemap_->get_location(this->lineoff_ + 1 - chars);
}
// Get the next token.
Token
Lex::next_token()
{
+ bool saw_cpp_comment = false;
while (true)
{
if (!this->require_line())
return this->make_eof_token();
}
+ if (!saw_cpp_comment)
+ this->extern_.clear();
+ saw_cpp_comment = false;
+
const char* p = this->linebuf_ + this->lineoff_;
const char* pend = this->linebuf_ + this->linesize_;
p = pend;
if (p[-1] == '\n' && this->add_semi_at_eol_)
--p;
+ saw_cpp_comment = true;
}
else if (p[1] == '*')
{
this->lineoff_ = p - this->linebuf_;
- source_location location = this->location();
+ Location location = this->location();
if (!this->skip_c_comment())
return Token::make_invalid_token(location);
p = this->linebuf_ + this->lineoff_;
unsigned int ci;
bool issued_error;
this->lineoff_ = p - this->linebuf_;
- this->advance_one_utf8_char(p, &ci, &issued_error);
+ const char *pnext = this->advance_one_utf8_char(p, &ci,
+ &issued_error);
+
+ // Ignore byte order mark at start of file.
+ if (ci == 0xfeff)
+ {
+ p = pnext;
+ break;
+ }
+
if (Lex::is_unicode_letter(ci))
return this->gather_identifier();
*issued_error = true;
return p + 1;
}
+
+ // Warn about byte order mark, except at start of file.
+ if (*value == 0xfeff && (this->lineno_ != 1 || this->lineoff_ != 0))
+ {
+ error_at(this->location(), "Unicode (UTF-8) BOM in middle of file");
+ *issued_error = true;
+ }
+
return p + adv;
}
this->lineoff_ = p - this->linebuf_;
const char* pnext = this->advance_one_utf8_char(p, &ci,
&issued_error);
+ bool is_invalid = false;
if (!Lex::is_unicode_letter(ci) && !Lex::is_unicode_digit(ci))
{
// There is no valid place for a non-ASCII character
error_at(this->location(),
"invalid character 0x%x in identifier",
ci);
+ is_invalid = true;
}
if (is_first)
{
buf.assign(pstart, p - pstart);
has_non_ascii_char = true;
}
+ if (is_invalid && !Lex::is_invalid_identifier(buf))
+ buf.append("$INVALID$");
p = pnext;
char ubuf[50];
// This assumes that all assemblers can handle an identifier
buf.append(ubuf);
}
}
- source_location location = this->location();
+ Location location = this->location();
this->add_semi_at_eol_ = true;
this->lineoff_ = p - this->linebuf_;
if (has_non_ascii_char)
const char* p = pstart;
const char* pend = this->linebuf_ + this->linesize_;
- source_location location = this->location();
+ Location location = this->location();
bool neg = false;
if (*p == '+')
}
}
- if (*p != '.' && *p != 'i' && !Lex::could_be_exponent(p, pend))
+ // A partial token that looks like an octal literal might actually be the
+ // beginning of a floating-point or imaginary literal.
+ if (base == 16 || (*p != '.' && *p != 'i' && !Lex::could_be_exponent(p, pend)))
{
std::string s(pnum, p - pnum);
mpz_t val;
void
Lex::append_char(unsigned int v, bool is_character, std::string* str,
- source_location location)
+ Location location)
{
char buf[4];
size_t len;
// Turn it into the "replacement character".
v = 0xfffd;
}
+ if (v >= 0xd800 && v < 0xe000)
+ {
+ warning_at(location, 0,
+ "unicode code point 0x%x is invalid surrogate pair", v);
+ v = 0xfffd;
+ }
if (v <= 0xffff)
{
buf[0] = 0xe0 + (v >> 12);
mpz_t val;
mpz_init_set_ui(val, value);
- source_location location = this->location();
+ Location location = this->location();
this->lineoff_ = p + 1 - this->linebuf_;
- Token ret = Token::make_integer_token(val, location);
+ Token ret = Token::make_character_token(val, location);
mpz_clear(val);
return ret;
}
std::string value;
while (*p != '"')
{
- source_location loc = this->location();
+ Location loc = this->location();
unsigned int c;
bool is_character;
this->lineoff_ = p - this->linebuf_;
Lex::append_char(c, is_character, &value, loc);
}
- source_location location = this->location();
+ Location location = this->location();
this->lineoff_ = p + 1 - this->linebuf_;
return Token::make_string_token(value, location);
}
{
const char* p = this->linebuf_ + this->lineoff_ + 1;
const char* pend = this->linebuf_ + this->linesize_;
- source_location location = this->location();
+ Location location = this->location();
std::string value;
while (true)
this->lineoff_ = p + 1 - this->linebuf_;
return Token::make_string_token(value, location);
}
- source_location loc = this->location();
+ Location loc = this->location();
unsigned int c;
bool issued_error;
this->lineoff_ = p - this->linebuf_;
void
Lex::skip_cpp_comment()
{
+ // Ensure that if EXTERN_ is set, it means that we just saw a
+ // //extern comment.
+ this->extern_.clear();
+
const char* p = this->linebuf_ + this->lineoff_;
const char* pend = this->linebuf_ + this->linesize_;
memcpy(file, p, filelen);
file[filelen] = '\0';
- linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
- linemap_add(line_table, LC_ENTER, 0, file, lineno);
+ this->linemap_->start_file(file, lineno);
this->lineno_ = lineno - 1;
p = plend;
}
}
+ // As a special gccgo extension, a C++ comment at the start of the
+ // line of the form
+ // //extern NAME
+ // which immediately precedes a function declaration means that the
+ // external name of the function declaration is NAME. This is
+ // normally used to permit Go code to call a C function.
+ if (this->lineoff_ == 2
+ && pend - p > 7
+ && memcmp(p, "extern ", 7) == 0)
+ {
+ p += 7;
+ while (p < pend && (*p == ' ' || *p == '\t'))
+ ++p;
+ const char* plend = pend;
+ while (plend > p
+ && (plend[-1] == ' ' || plend[-1] == '\t' || plend[-1] == '\n'))
+ --plend;
+ if (plend > p)
+ this->extern_ = std::string(p, plend - p);
+ }
+
while (p < pend)
{
this->lineoff_ = p - this->linebuf_;
unsigned int c;
bool issued_error;
p = this->advance_one_utf8_char(p, &c, &issued_error);
+ if (issued_error)
+ this->extern_.clear();
}
}
unsigned int stride;
};
+// A table of whitespace characters--Unicode code points classified as
+// "Space", "C" locale whitespace characters, the "next line" control
+// character (0085), the line separator (2028), the paragraph
+// separator (2029), and the "zero-width non-break space" (feff).
+
+static const Unicode_range unicode_space[] =
+{
+ { 0x0009, 0x000d, 1 },
+ { 0x0020, 0x0020, 1 },
+ { 0x0085, 0x0085, 1 },
+ { 0x00a0, 0x00a0, 1 },
+ { 0x1680, 0x1680, 1 },
+ { 0x180e, 0x180e, 1 },
+ { 0x2000, 0x200a, 1 },
+ { 0x2028, 0x2029, 1 },
+ { 0x202f, 0x202f, 1 },
+ { 0x205f, 0x205f, 1 },
+ { 0x3000, 0x3000, 1 },
+ { 0xfeff, 0xfeff, 1 },
+};
+
// A table of Unicode digits--Unicode code points classified as
// "Digit".
}
}
+// Return whether C is a space character.
+
+bool
+Lex::is_unicode_space(unsigned int c)
+{
+ return Lex::is_in_unicode_range(c, unicode_space,
+ ARRAY_SIZE(unicode_space));
+}
+
// Return whether C is a Unicode digit--a Unicode code point
// classified as "Digit".
return Lex::is_unicode_uppercase(ci);
}
}
+
+// Return whether the identifier NAME contains an invalid character.
+// This is based on how we handle invalid characters in
+// gather_identifier.
+
+bool
+Lex::is_invalid_identifier(const std::string& name)
+{
+ return name.find("$INVALID$") != std::string::npos;
+}