For PR java/2812:

[pf3gnuchains/gcc-fork.git] / gcc / cpplex.c
diff --git a/gcc/cpplex.c b/gcc/cpplex.c

index 9bbab0f..7424827 100644 (file)
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -98,11 +98,8 @@ static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  static int name_p PARAMS ((cpp_reader *, const cpp_string *));
-static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
-                                         const unsigned char *, HOST_WIDE_INT,
-                                         int));
-static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
-                                     const unsigned char *, unsigned int));
+static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
+                                  const unsigned char *, unsigned int *));
  
  static cpp_chunk *new_chunk PARAMS ((unsigned int));
  static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
@@ -121,7 +118,7 @@ cpp_ideq (token, string)
    if (token->type != CPP_NAME)
      return 0;
  
-  return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
+  return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
  }
  
  /* Call when meeting a newline.  Returns the character after the newline
@@ -479,22 +476,14 @@ parse_identifier (pfile, c)
  {
    cpp_hashnode *result;
    cpp_buffer *buffer = pfile->buffer;
-  unsigned char *dest, *limit;
-  unsigned int r = 0, saw_dollar = 0;
-
-  dest = POOL_FRONT (&pfile->ident_pool);
-  limit = POOL_LIMIT (&pfile->ident_pool);
+  unsigned int saw_dollar = 0, len;
+  struct obstack *stack = &pfile->hash_table->stack;
  
    do
      {
        do
         {
-         /* Need room for terminating null.  */
-         if (dest + 1 >= limit)
-           limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
-
-         *dest++ = c;
-         r = HASHSTEP (r, c);
+         obstack_1grow (stack, c);
  
           if (c == '$')
             saw_dollar++;
@@ -524,18 +513,20 @@ parse_identifier (pfile, c)
      cpp_pedwarn (pfile, "'$' character(s) in identifier");
  
    /* Identifiers are null-terminated.  */
-  *dest = '\0';
+  len = obstack_object_size (stack);
+  obstack_1grow (stack, '\0');
  
    /* This routine commits the memory if necessary.  */
-  result = _cpp_lookup_with_hash (pfile,
-                                 dest - POOL_FRONT (&pfile->ident_pool), r);
+  result = (cpp_hashnode *)
+    ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
  
    /* Some identifiers require diagnostics when lexed.  */
    if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
      {
        /* It is allowed to poison the same identifier twice.  */
        if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
-       cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
+       cpp_error (pfile, "attempt to use poisoned \"%s\"",
+                  NODE_NAME (result));
  
        /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
          replacement list of a variadic macro.  */
@@ -890,9 +881,11 @@ _cpp_lex_token (pfile, result)
    switch (c)
      {
      case EOF:
-      /* Non-empty files should end in a newline.  Ignore for command
-        line and _Pragma buffers.  */
-      if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
+      /* Non-empty files should end in a newline.  Checking "bol" too
+         prevents multiple warnings when hitting the EOF more than
+         once, like in a directive.  Don't warn for command line and
+         _Pragma buffers.  */
+      if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
         cpp_pedwarn (pfile, "no newline at end of file");
        pfile->state.next_bol = 1;
        pfile->skipping = 0;     /* In case missing #endif.  */
@@ -1008,7 +1001,7 @@ _cpp_lex_token (pfile, result)
         ACCEPT_CHAR (CPP_DIV_EQ);
        if (c != '/' && c != '*')
         break;
-
+      
        if (c == '*')
         {
           if (skip_block_comment (pfile))
@@ -1286,9 +1279,9 @@ cpp_token_len (token)
  
    switch (TOKEN_SPELL (token))
      {
-    default:           len = 0;                        break;
-    case SPELL_STRING: len = token->val.str.len;       break;
-    case SPELL_IDENT:  len = token->val.node->length;  break;
+    default:           len = 0;                                break;
+    case SPELL_STRING: len = token->val.str.len;               break;
+    case SPELL_IDENT:  len = NODE_LEN (token->val.node);       break;
      }
    /* 1 for whitespace, 4 for comment delimeters.  */
    return len + 5;
@@ -1326,8 +1319,8 @@ cpp_spell_token (pfile, token, buffer)
  
      case SPELL_IDENT:
        spell_ident:
-      memcpy (buffer, token->val.node->name, token->val.node->length);
-      buffer += token->val.node->length;
+      memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
+      buffer += NODE_LEN (token->val.node);
        break;
  
      case SPELL_STRING:
@@ -1417,7 +1410,7 @@ cpp_output_token (token, fp)
  
      spell_ident:
      case SPELL_IDENT:
-      ufputs (token->val.node->name, fp);
+      ufputs (NODE_NAME (token->val.node), fp);
      break;
  
      case SPELL_STRING:
@@ -1672,7 +1665,10 @@ hex_digit_value (c)
    abort ();
  }
  
-/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
+/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
+   failure if cpplib is not parsing C++ or C99.  Such failure is
+   silent, and no variables are updated.  Otherwise returns 0, and
+   warns if -Wtraditional.
  
     [lex.charset]: The character designated by the universal character
     name \UNNNNNNNN is that character whose character short name in
@@ -1685,75 +1681,92 @@ hex_digit_value (c)
     program is ill-formed.
  
     We assume that wchar_t is Unicode, so we don't need to do any
-   mapping.  Is this ever wrong?  */
+   mapping.  Is this ever wrong?
  
-static unsigned int
-read_ucs (pfile, pstr, limit, length)
+   PC points to the 'u' or 'U', PSTR is points to the byte after PC,
+   LIMIT is the end of the string or charconst.  PSTR is updated to
+   point after the UCS on return, and the UCS is written into PC.  */
+
+static int
+maybe_read_ucs (pfile, pstr, limit, pc)
       cpp_reader *pfile;
       const unsigned char **pstr;
       const unsigned char *limit;
-     unsigned int length;
+     unsigned int *pc;
  {
    const unsigned char *p = *pstr;
-  unsigned int c, code = 0;
+  unsigned int code = 0;
+  unsigned int c = *pc, length;
  
-  for (; length; --length)
-    {
-      if (p >= limit)
-       {
-         cpp_error (pfile, "incomplete universal-character-name");
-         break;
-       }
+  /* Only attempt to interpret a UCS for C++ and C99.  */
+  if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
+    return 1;
  
-      c = *p;
-      if (ISXDIGIT (c))
-       {
-         code = (code << 4) + hex_digit_value (c);
-         p++;
-       }
-      else
+  if (CPP_WTRADITIONAL (pfile))
+    cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
+
+  length = (c == 'u' ? 4: 8);
+
+  if ((size_t) (limit - p) < length)
+    {
+      cpp_error (pfile, "incomplete universal-character-name");
+      /* Skip to the end to avoid more diagnostics.  */
+      p = limit;
+    }
+  else
+    {
+      for (; length; length--, p++)
         {
-         cpp_error (pfile,
-                    "non-hex digit '%c' in universal-character-name", c);
-         break;
+         c = *p;
+         if (ISXDIGIT (c))
+           code = (code << 4) + hex_digit_value (c);
+         else
+           {
+             cpp_error (pfile,
+                        "non-hex digit '%c' in universal-character-name", c);
+             /* We shouldn't skip in case there are multibyte chars.  */
+             break;
+           }
         }
-
      }
  
  #ifdef TARGET_EBCDIC
    cpp_error (pfile, "universal-character-name on EBCDIC target");
    code = 0x3f;  /* EBCDIC invalid character */
  #else
-  if (code > 0x9f && !(code & 0x80000000))
-    ; /* True extended character, OK.  */
-  else if (code >= 0x20 && code < 0x7f)
-    {
-      /* ASCII printable character.  The C character set consists of all of
-        these except $, @ and `.  We use hex escapes so that this also
-        works with EBCDIC hosts.  */
-      if (code != 0x24 && code != 0x40 && code != 0x60)
-       cpp_error (pfile, "universal-character-name used for '%c'", code);
-    }
-  else
-    cpp_error (pfile, "invalid universal-character-name");
+ /* True extended characters are OK.  */
+  if (code >= 0xa0
+      && !(code & 0x80000000)
+      && !(code >= 0xD800 && code <= 0xDFFF))
+    ;
+  /* The standard permits $, @ and ` to be specified as UCNs.  We use
+     hex escapes so that this also works with EBCDIC hosts.  */
+  else if (code == 0x24 || code == 0x40 || code == 0x60)
+    ;
+  /* Don't give another error if one occurred above.  */
+  else if (length == 0)
+    cpp_error (pfile, "universal-character-name out of range");
  #endif
  
    *pstr = p;
-  return code;
+  *pc = code;
+  return 0;
  }
  
  /* Interpret an escape sequence, and return its value.  PSTR points to
     the input pointer, which is just after the backslash.  LIMIT is how
-   much text we have.  MASK is the precision for the target type (char
-   or wchar_t).  TRADITIONAL, if true, does not interpret escapes that
-   did not exist in traditional C.  */
+   much text we have.  MASK is a bitmask for the precision for the
+   destination type (char or wchar_t).  TRADITIONAL, if true, does not
+   interpret escapes that did not exist in traditional C.
  
-static unsigned int
-parse_escape (pfile, pstr, limit, mask, traditional)
+   Handles all relevant diagnostics.  */
+
+unsigned int
+cpp_parse_escape (pfile, pstr, limit, mask, traditional)
       cpp_reader *pfile;
       const unsigned char **pstr;
       const unsigned char *limit;
-     HOST_WIDE_INT mask;
+     unsigned HOST_WIDE_INT mask;
       int traditional;
  {
    int unknown = 0;
@@ -1789,17 +1802,8 @@ parse_escape (pfile, pstr, limit, mask, traditional)
        c = TARGET_ESC;
        break;
        
-      /* Warnings and support checks handled by read_ucs().  */
      case 'u': case 'U':
-      if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
-       {
-         if (CPP_WTRADITIONAL (pfile))
-           cpp_warning (pfile,
-                        "the meaning of '\\%c' varies with -traditional", c);
-         c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
-       }
-      else
-       unknown = 1;
+      unknown = maybe_read_ucs (pfile, &str, limit, &c);
        break;
  
      case 'x':
@@ -1871,6 +1875,9 @@ parse_escape (pfile, pstr, limit, mask, traditional)
         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
      }
  
+  if (c > mask)
+    cpp_pedwarn (pfile, "escape sequence out of range for character");
+
    *pstr = str;
    return c;
  }
@@ -1900,7 +1907,8 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
    const unsigned char *limit = str + token->val.str.len;
    unsigned int chars_seen = 0;
    unsigned int width, max_chars, c;
-  HOST_WIDE_INT result = 0, mask;
+  unsigned HOST_WIDE_INT mask;
+  HOST_WIDE_INT result = 0;
  
  #ifdef MULTIBYTE_CHARS
    (void) local_mbtowc (NULL, NULL, 0);
@@ -1940,11 +1948,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
  #endif
  
        if (c == '\\')
-       {
-         c = parse_escape (pfile, &str, limit, mask, traditional);
-         if (width < HOST_BITS_PER_WIDE_INT && c > mask)
-           cpp_pedwarn (pfile, "escape sequence out of range for character");
-       }
+       c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
  
  #ifdef MAP_CHARACTER
        if (ISPRINT (c))
@@ -1966,7 +1970,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
    else if (chars_seen > max_chars)
      {
        chars_seen = max_chars;
-      cpp_error (pfile, "character constant too long");
+      cpp_warning (pfile, "character constant too long");
      }
    else if (chars_seen > 1 && !traditional && warn_multi)
      cpp_warning (pfile, "multi-character character constant");