* calls.c (expand_call): Convert structure_value_addr to Pmode if

[pf3gnuchains/gcc-fork.git] / gcc / cpplex.c
diff --git a/gcc/cpplex.c b/gcc/cpplex.c

index c8caa39..a79bedd 100644 (file)
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -26,15 +26,11 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  #include "cpplib.h"
  #include "cpphash.h"
  
-/* Tokens with SPELL_STRING store their spelling in the token list,
-   and it's length in the token->val.name.len.  */
  enum spell_type
  {
    SPELL_OPERATOR = 0,
-  SPELL_CHAR,
    SPELL_IDENT,
-  SPELL_NUMBER,
-  SPELL_STRING,
+  SPELL_LITERAL,
    SPELL_NONE
  };
  
@@ -59,15 +55,17 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  static int skip_line_comment PARAMS ((cpp_reader *));
  static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
-static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *));
+static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  static void lex_number PARAMS ((cpp_reader *, cpp_string *));
-static bool continues_identifier_p PARAMS ((cpp_reader *));
-static void lex_string PARAMS ((cpp_reader *, cpp_token *));
+static bool forms_identifier_p PARAMS ((cpp_reader *, int));
+static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
                                   cppchar_t));
+static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
+                                   unsigned int, enum cpp_ttype));
+static bool warn_in_comment PARAMS ((cpp_reader *, _cpp_line_note *));
  static int name_p PARAMS ((cpp_reader *, const cpp_string *));
-static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
-                                  const unsigned char *, cppchar_t *));
+static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
  static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  
  static unsigned int hex_digit_value PARAMS ((unsigned int));
@@ -149,15 +147,14 @@ _cpp_clean_line (pfile)
               if (p == buffer->next_line || p[-1] != '\\')
                 break;
  
-             add_line_note (buffer, p - 1,
-                            p != d ? NOTE_ESC_SPACE_NL: NOTE_ESC_NL);
+             add_line_note (buffer, p - 1, p != d ? ' ': '\\');
               d = p - 2;
               buffer->next_line = p - 1;
             }
           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
             {
               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
-             add_line_note (buffer, d, NOTE_TRIGRAPH);
+             add_line_note (buffer, d, s[2]);
               if (CPP_OPTION (pfile, trigraphs))
                 {
                   *d = _cpp_trigraph_map[s[2]];
@@ -179,10 +176,41 @@ _cpp_clean_line (pfile)
      }
  
    *d = '\n';
-  add_line_note (buffer, d + 1, NOTE_NEWLINE);
+  /* A sentinel note that should never be processed.  */
+  add_line_note (buffer, d + 1, '\n');
    buffer->next_line = s + 1;
  }
  
+/* Return true if the trigraph indicated by NOTE should be warned
+   about in a comment.  */
+static bool
+warn_in_comment (pfile, note)
+     cpp_reader *pfile;
+     _cpp_line_note *note;
+{
+  const uchar *p;
+
+  /* Within comments we don't warn about trigraphs, unless the
+     trigraph forms an escaped newline, as that may change
+     behaviour.  */
+  if (note->type != '/')
+    return false;
+
+  /* If -trigraphs, then this was an escaped newline iff the next note
+     is coincident.  */
+  if (CPP_OPTION (pfile, trigraphs))
+    return note[1].pos == note->pos;
+
+  /* Otherwise, see if this forms an escaped newline.  */
+  p = note->pos + 3;
+  while (is_nvspace (*p))
+    p++;
+
+  /* There might have been escaped newlines between the trigraph and the
+     newline we found.  Hence the position test.  */
+  return (*p == '\n' && p < note[1].pos);
+}
+
  /* Process the notes created by add_line_note as far as the current
     location.  */
  void
@@ -203,32 +231,12 @@ _cpp_process_line_notes (pfile, in_comment)
        buffer->cur_note++;
        col = CPP_BUF_COLUMN (buffer, note->pos + 1);
  
-      switch (note->type)
+      if (note->type == '\\' || note->type == ' ')
         {
-       case NOTE_NEWLINE:
-         /* This note is a kind of sentinel we should never reach.  */
-         abort ();
-
-       case NOTE_TRIGRAPH:
-         if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
-           {
-             if (CPP_OPTION (pfile, trigraphs))
-               cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
-                                    "trigraph converted to %c",
-                                    (int) note->pos[0]);
-             else
-               cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
-                                    "trigraph ??%c ignored",
-                                    (int) note->pos[2]);
-           }
-         break;
-
-       case NOTE_ESC_SPACE_NL:
-         if (!in_comment)
+         if (note->type == ' ' && !in_comment)
             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
                                  "backslash and newline separated by space");
-         /* Fall through... */
-       case NOTE_ESC_NL:
+
           if (buffer->next_line > buffer->rlimit)
             {
               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
@@ -240,6 +248,24 @@ _cpp_process_line_notes (pfile, in_comment)
           buffer->line_base = note->pos;
           pfile->line++;
         }
+      else if (_cpp_trigraph_map[note->type])
+       {
+         if (CPP_OPTION (pfile, warn_trigraphs)
+             && (!in_comment || warn_in_comment (pfile, note)))
+           {
+             if (CPP_OPTION (pfile, trigraphs))
+               cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
+                                    "trigraph ??%c converted to %c",
+                                    note->type,
+                                    (int) _cpp_trigraph_map[note->type]);
+             else
+               cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
+                                    "trigraph ??%c ignored",
+                                    note->type);
+           }
+       }
+      else
+       abort ();
      }
  }
  
@@ -290,6 +316,7 @@ _cpp_skip_block_comment (pfile)
         }
      }
  
+  _cpp_process_line_notes (pfile, true);
    return false;
  }
  
@@ -361,33 +388,51 @@ name_p (pfile, string)
  }
  
  /* Returns TRUE if the sequence starting at buffer->cur is invalid in
-   an identifier.  */
+   an identifier.  FIRST is TRUE if this starts an identifier.  */
  static bool
-continues_identifier_p (pfile)
+forms_identifier_p (pfile, first)
       cpp_reader *pfile;
+     int first;
  {
-  if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident))
-    return false;
+  cpp_buffer *buffer = pfile->buffer;
  
-  if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar)
+  if (*buffer->cur == '$')
      {
-      pfile->warned_dollar = true;
-      cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
+      if (!CPP_OPTION (pfile, dollars_in_ident))
+       return false;
+
+      buffer->cur++;
+      if (pfile->warn_dollars && !pfile->state.skipping)
+       {
+         pfile->warn_dollars = false;
+         cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
+       }
+
+      return true;
      }
-  pfile->buffer->cur++;
  
-  return true;
+  /* Is this a syntactically valid UCN?  */
+  if (0 && *buffer->cur == '\\'
+      && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
+    {
+      buffer->cur += 2;
+      if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+       return true;
+      buffer->cur -= 2;
+    }
+
+  return false;
  }
  
  /* Lex an identifier starting at BUFFER->CUR - 1.  */
  static cpp_hashnode *
-lex_identifier (pfile)
+lex_identifier (pfile, base)
       cpp_reader *pfile;
+     const uchar *base;
  {
    cpp_hashnode *result;
-  const uchar *cur, *base;
+  const uchar *cur;
  
-  base = pfile->buffer->cur - 1;
    do
      {
        cur = pfile->buffer->cur;
@@ -398,7 +443,7 @@ lex_identifier (pfile)
  
        pfile->buffer->cur = cur;
      }
-  while (continues_identifier_p (pfile));
+  while (forms_identifier_p (pfile, false));
  
    result = (cpp_hashnode *)
      ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
@@ -444,7 +489,7 @@ lex_number (pfile, number)
  
        pfile->buffer->cur = cur;
      }
-  while (continues_identifier_p (pfile));
+  while (forms_identifier_p (pfile, false));
  
    number->len = cur - base;
    dest = _cpp_unaligned_alloc (pfile, number->len + 1);
@@ -453,63 +498,77 @@ lex_number (pfile, number)
    number->text = dest;
  }
  
+/* Create a token of type TYPE with a literal spelling.  */
+static void
+create_literal (pfile, token, base, len, type)
+     cpp_reader *pfile;
+     cpp_token *token;
+     const uchar *base;
+     unsigned int len;
+     enum cpp_ttype type;
+{
+  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
+
+  memcpy (dest, base, len);
+  dest[len] = '\0';
+  token->type = type;
+  token->val.str.len = len;
+  token->val.str.text = dest;
+}
+
  /* Lexes a string, character constant, or angle-bracketed header file
-   name.  The stored string is guaranteed NUL-terminated, but it is
-   not guaranteed that this is the first NUL since embedded NULs are
-   preserved.  */
+   name.  The stored string contains the spelling, including opening
+   quote and leading any leading 'L'.  It returns the type of the
+   literal, or CPP_OTHER if it was not properly terminated.
+
+   The spelling is NUL-terminated, but it is not guaranteed that this
+   is the first NUL since embedded NULs are preserved.  */
  static void
-lex_string (pfile, token)
+lex_string (pfile, token, base)
       cpp_reader *pfile;
       cpp_token *token;
+     const uchar *base;
  {
-  cpp_buffer *buffer = pfile->buffer;
-  bool warned_nulls = false;
-  const uchar *base;
-  uchar *dest;
+  bool saw_NUL = false;
+  const uchar *cur;
    cppchar_t terminator;
-
-  base = buffer->cur;
-  terminator = base[-1];
-  if (terminator == '<')
-    terminator = '>';
+  enum cpp_ttype type;
+
+  cur = base;
+  terminator = *cur++;
+  if (terminator == 'L')
+    terminator = *cur++;
+  if (terminator == '\"')
+    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
+  else if (terminator == '\'')
+    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
+  else
+    terminator = '>', type = CPP_HEADER_NAME;
  
    for (;;)
      {
-      cppchar_t c = *buffer->cur++;
+      cppchar_t c = *cur++;
  
        /* In #include-style directives, terminators are not escapable.  */
-      if (c == '\\' && !pfile->state.angled_headers && *buffer->cur != '\n')
-       buffer->cur++;
-      else if (c == terminator || c == '\n')
+      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
+       cur++;
+      else if (c == terminator)
         break;
-      else if (c == '\0')
+      else if (c == '\n')
         {
-         if (!warned_nulls)
-           {
-             warned_nulls = true;
-             cpp_error (pfile, DL_WARNING,
-                        "null character(s) preserved in literal");
-           }
+         cur--;
+         type = CPP_OTHER;
+         break;
         }
+      else if (c == '\0')
+       saw_NUL = true;
      }
  
-  token->val.str.len = buffer->cur - base - 1;
-  dest = _cpp_unaligned_alloc (pfile, token->val.str.len + 1);
-  memcpy (dest, base, token->val.str.len);
-  dest[token->val.str.len] = '\0';
-  token->val.str.text = dest;
+  if (saw_NUL && !pfile->state.skipping)
+    cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
  
-  if (buffer->cur[-1] == '\n')
-    {
-      /* No string literal may extend over multiple lines.  In
-        assembly language, suppress the error except for <>
-        includes.  This is a kludge around not knowing where
-        comments are.  */
-      if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
-       cpp_error (pfile, DL_ERROR, "missing terminating %c character",
-                  (int) terminator);
-      buffer->cur--;
-    }
+  pfile->buffer->cur = cur;
+  create_literal (pfile, token, base, cur - base, type);
  }
  
  /* The stored comment includes the comment start and any terminator.  */
@@ -699,15 +758,15 @@ _cpp_get_fresh_line (pfile)
                                "no newline at end of file");
         }
   
+      if (!buffer->prev)
+       return false;
+
        if (buffer->return_at_eof)
         {
-         buffer->return_at_eof = false;
+         _cpp_pop_buffer (pfile);
           return false;
         }
  
-      if (!buffer->prev)
-       return false;
-
        _cpp_pop_buffer (pfile);
      }
  }
@@ -748,6 +807,12 @@ _cpp_lex_direct (pfile)
        if (!_cpp_get_fresh_line (pfile))
         {
           result->type = CPP_EOF;
+         if (!pfile->state.in_directive)
+           {
+             /* Tell the compiler the line number of the EOF token.  */
+             result->line = pfile->line;
+             result->flags = BOL;
+           }
           return result;
         }
        if (!pfile->keep_tokens)
@@ -796,14 +861,11 @@ _cpp_lex_direct (pfile)
        /* 'L' may introduce wide characters or strings.  */
        if (*buffer->cur == '\'' || *buffer->cur == '"')
         {
-         result->type = (*buffer->cur == '"' ? CPP_WSTRING: CPP_WCHAR);
-         buffer->cur++;
-         lex_string (pfile, result);
+         lex_string (pfile, result, buffer->cur - 1);
           break;
         }
        /* Fall through.  */
  
-    start_ident:
      case '_':
      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
      case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
@@ -816,7 +878,7 @@ _cpp_lex_direct (pfile)
      case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
      case 'Y': case 'Z':
        result->type = CPP_NAME;
-      result->val.node = lex_identifier (pfile);
+      result->val.node = lex_identifier (pfile, buffer->cur - 1);
  
        /* Convert named operators to their proper types.  */
        if (result->val.node->flags & NODE_OPERATOR)
@@ -828,8 +890,7 @@ _cpp_lex_direct (pfile)
  
      case '\'':
      case '"':
-      result->type = c == '"' ? CPP_STRING: CPP_CHAR;
-      lex_string (pfile, result);
+      lex_string (pfile, result, buffer->cur - 1);
        break;
  
      case '/':
@@ -885,8 +946,7 @@ _cpp_lex_direct (pfile)
      case '<':
        if (pfile->state.angled_headers)
         {
-         result->type = CPP_HEADER_NAME;
-         lex_string (pfile, result);
+         lex_string (pfile, result, buffer->cur - 1);
           break;
         }
  
@@ -1044,21 +1104,29 @@ _cpp_lex_direct (pfile)
      case '@': result->type = CPP_ATSIGN; break;
  
      case '$':
-      if (CPP_OPTION (pfile, dollars_in_ident))
-       goto start_ident;
-      /* Fall through...  */
+    case '\\':
+      {
+       const uchar *base = --buffer->cur;
+
+       if (forms_identifier_p (pfile, true))
+         {
+           result->type = CPP_NAME;
+           result->val.node = lex_identifier (pfile, base);
+           break;
+         }
+       buffer->cur++;
+      }
  
      default:
-      result->type = CPP_OTHER;
-      result->val.c = c;
+      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
        break;
      }
  
    return result;
  }
  
-/* An upper bound on the number of bytes needed to spell TOKEN,
-   including preceding whitespace.  */
+/* An upper bound on the number of bytes needed to spell TOKEN.
+   Does not include preceding whitespace.  */
  unsigned int
  cpp_token_len (token)
       const cpp_token *token;
@@ -1067,13 +1135,12 @@ cpp_token_len (token)
  
    switch (TOKEN_SPELL (token))
      {
-    default:           len = 0;                                break;
-    case SPELL_NUMBER:
-    case SPELL_STRING: len = token->val.str.len;               break;
+    default:           len = 4;                                break;
+    case SPELL_LITERAL:        len = token->val.str.len;               break;
      case SPELL_IDENT:  len = NODE_LEN (token->val.node);       break;
      }
-  /* 1 for whitespace, 4 for comment delimiters.  */
-  return len + 5;
+
+  return len;
  }
  
  /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
@@ -1106,44 +1173,17 @@ cpp_spell_token (pfile, token, buffer)
        }
        break;
  
-    case SPELL_CHAR:
-      *buffer++ = token->val.c;
-      break;
-
      spell_ident:
      case SPELL_IDENT:
        memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
        buffer += NODE_LEN (token->val.node);
        break;
  
-    case SPELL_NUMBER:
+    case SPELL_LITERAL:
        memcpy (buffer, token->val.str.text, token->val.str.len);
        buffer += token->val.str.len;
        break;
  
-    case SPELL_STRING:
-      {
-       int left, right, tag;
-       switch (token->type)
-         {
-         case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
-         case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
-         case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
-         case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
-         case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
-         default:
-           cpp_error (pfile, DL_ICE, "unknown string token %s\n",
-                      TOKEN_NAME (token));
-           return buffer;
-         }
-       if (tag) *buffer++ = tag;
-       *buffer++ = left;
-       memcpy (buffer, token->val.str.text, token->val.str.len);
-       buffer += token->val.str.len;
-       *buffer++ = right;
-      }
-      break;
-
      case SPELL_NONE:
        cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
        break;
@@ -1158,8 +1198,8 @@ unsigned char *
  cpp_token_as_text (pfile, token)
       cpp_reader *pfile;
       const cpp_token *token;
-{
-  unsigned int len = cpp_token_len (token);
+{ 
+  unsigned int len = cpp_token_len (token) + 1;
    unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
  
    end = cpp_spell_token (pfile, token, start);
@@ -1207,40 +1247,15 @@ cpp_output_token (token, fp)
        }
        break;
  
-    case SPELL_CHAR:
-      putc (token->val.c, fp);
-      break;
-
      spell_ident:
      case SPELL_IDENT:
        fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
      break;
  
-    case SPELL_NUMBER:
+    case SPELL_LITERAL:
        fwrite (token->val.str.text, 1, token->val.str.len, fp);
        break;
  
-    case SPELL_STRING:
-      {
-       int left, right, tag;
-       switch (token->type)
-         {
-         case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
-         case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
-         case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
-         case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
-         case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
-         default:
-           fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
-           return;
-         }
-       if (tag) putc (tag, fp);
-       putc (left, fp);
-       fwrite (token->val.str.text, 1, token->val.str.len, fp);
-       putc (right, fp);
-      }
-      break;
-
      case SPELL_NONE:
        /* An error, most probably.  */
        break;
@@ -1258,14 +1273,11 @@ _cpp_equiv_tokens (a, b)
        default:                 /* Keep compiler happy.  */
        case SPELL_OPERATOR:
         return 1;
-      case SPELL_CHAR:
-       return a->val.c == b->val.c; /* Character.  */
        case SPELL_NONE:
         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
        case SPELL_IDENT:
         return a->val.node == b->val.node;
-      case SPELL_NUMBER:
-      case SPELL_STRING:
+      case SPELL_LITERAL:
         return (a->val.str.len == b->val.str.len
                 && !memcmp (a->val.str.text, b->val.str.text,
                             a->val.str.len));
@@ -1321,9 +1333,12 @@ cpp_avoid_paste (pfile, token1, token2)
                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
      case CPP_NUMBER:   return (b == CPP_NUMBER || b == CPP_NAME
                                 || c == '.' || c == '+' || c == '-');
-    case CPP_OTHER:    return (CPP_OPTION (pfile, objc)
-                               && token1->val.c == '@'
-                               && (b == CPP_NAME || b == CPP_STRING));
+                                     /* UCNs */
+    case CPP_OTHER:    return ((token1->val.str.text[0] == '\\'
+                                && b == CPP_NAME)
+                               || (CPP_OPTION (pfile, objc)
+                                   && token1->val.str.text[0] == '@'
+                                   && (b == CPP_NAME || b == CPP_STRING)));
      default:           break;
      }
  
@@ -1363,93 +1378,31 @@ hex_digit_value (c)
      abort ();
  }
  
-/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
-   failure if cpplib is not parsing C++ or C99.  Such failure is
-   silent, and no variables are updated.  Otherwise returns 0, and
-   warns if -Wtraditional.
-
-   [lex.charset]: The character designated by the universal character
-   name \UNNNNNNNN is that character whose character short name in
-   ISO/IEC 10646 is NNNNNNNN; the character designated by the
-   universal character name \uNNNN is that character whose character
-   short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
-   for a universal character name is less than 0x20 or in the range
-   0x7F-0x9F (inclusive), or if the universal character name
-   designates a character in the basic source character set, then the
-   program is ill-formed.
-
-   We assume that wchar_t is Unicode, so we don't need to do any
-   mapping.  Is this ever wrong?
-
-   PC points to the 'u' or 'U', PSTR is points to the byte after PC,
-   LIMIT is the end of the string or charconst.  PSTR is updated to
-   point after the UCS on return, and the UCS is written into PC.  */
-
-static int
-maybe_read_ucs (pfile, pstr, limit, pc)
+/* Read a possible universal character name starting at *PSTR.  */
+static cppchar_t
+maybe_read_ucn (pfile, pstr)
       cpp_reader *pfile;
-     const unsigned char **pstr;
-     const unsigned char *limit;
-     cppchar_t *pc;
+     const uchar **pstr;
  {
-  const unsigned char *p = *pstr;
-  unsigned int code = 0;
-  unsigned int c = *pc, length;
-
-  /* Only attempt to interpret a UCS for C++ and C99.  */
-  if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
-    return 1;
-
-  if (CPP_WTRADITIONAL (pfile))
-    cpp_error (pfile, DL_WARNING,
-              "the meaning of '\\%c' is different in traditional C", c);
-
-  length = (c == 'u' ? 4: 8);
+  cppchar_t result, c = (*pstr)[-1];
  
-  if ((size_t) (limit - p) < length)
+  result = _cpp_valid_ucn (pfile, pstr, false);
+  if (result)
      {
-      cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
-      /* Skip to the end to avoid more diagnostics.  */
-      p = limit;
-    }
-  else
-    {
-      for (; length; length--, p++)
+      if (CPP_WTRADITIONAL (pfile))
+       cpp_error (pfile, DL_WARNING,
+                  "the meaning of '\\%c' is different in traditional C",
+                  (int) c);
+
+      if (CPP_OPTION (pfile, EBCDIC))
         {
-         c = *p;
-         if (ISXDIGIT (c))
-           code = (code << 4) + hex_digit_value (c);
-         else
-           {
-             cpp_error (pfile, DL_ERROR,
-                        "non-hex digit '%c' in universal-character-name", c);
-             /* We shouldn't skip in case there are multibyte chars.  */
-             break;
-           }
+         cpp_error (pfile, DL_ERROR,
+                    "universal character with an EBCDIC target");
+         result = 0x3f;  /* EBCDIC invalid character */
         }
      }
  
-  if (CPP_OPTION (pfile, EBCDIC))
-    {
-      cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
-      code = 0x3f;  /* EBCDIC invalid character */
-    }
-  /* True extended characters are OK.  */
-  else if (code >= 0xa0
-          && !(code & 0x80000000)
-          && !(code >= 0xD800 && code <= 0xDFFF))
-    ;
-  /* The standard permits $, @ and ` to be specified as UCNs.  We use
-     hex escapes so that this also works with EBCDIC hosts.  */
-  else if (code == 0x24 || code == 0x40 || code == 0x60)
-    ;
-  /* Don't give another error if one occurred above.  */
-  else if (length == 0)
-    cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
-
-  *pstr = p;
-  *pc = code;
-  return 0;
+  return result;
  }
  
  /* Returns the value of an escape sequence, truncated to the correct
@@ -1470,7 +1423,7 @@ cpp_parse_escape (pfile, pstr, limit, wide)
  
    int unknown = 0;
    const unsigned char *str = *pstr, *charconsts;
-  cppchar_t c, mask;
+  cppchar_t c, ucn, mask;
    unsigned int width;
  
    if (CPP_OPTION (pfile, EBCDIC))
@@ -1519,7 +1472,11 @@ cpp_parse_escape (pfile, pstr, limit, wide)
        break;
  
      case 'u': case 'U':
-      unknown = maybe_read_ucs (pfile, &str, limit, &c);
+      ucn = maybe_read_ucn (pfile, &str);
+      if (ucn)
+       c = ucn;
+      else
+       unknown = true;
        break;
  
      case 'x':
@@ -1618,14 +1575,15 @@ cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
       unsigned int *pchars_seen;
       int *unsignedp;
  {
-  const unsigned char *str = token->val.str.text;
-  const unsigned char *limit = str + token->val.str.len;
+  const unsigned char *str, *limit;
    unsigned int chars_seen = 0;
    size_t width, max_chars;
    cppchar_t c, mask, result = 0;
    bool unsigned_p;
  
-  /* Width in bits.  */
+  str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
+  limit = token->val.str.text + token->val.str.len - 1;
+
    if (token->type == CPP_CHAR)
      {
        width = CPP_OPTION (pfile, char_precision);