2005-06-29 Kelley Cook <kcook@gcc.gnu.org>

[pf3gnuchains/gcc-fork.git] / libcpp / lex.c
diff --git a/libcpp/lex.c b/libcpp/lex.c

index 8398c7c..ee38a55 100644 (file)
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1,5 +1,5 @@
  /* CPP Library - lexical analysis.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
     Contributed by Per Bothner, 1994-95.
     Based on CCCP program by Paul Rubin, June 1986
     Adapted to ANSI C, Richard Stallman, Jan 1987
@@ -17,7 +17,7 @@ GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  
  #include "config.h"
  #include "system.h"
@@ -53,9 +53,6 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  static int skip_line_comment (cpp_reader *);
  static void skip_whitespace (cpp_reader *, cppchar_t);
-static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool);
-static void lex_number (cpp_reader *, cpp_string *);
-static bool forms_identifier_p (cpp_reader *, int);
  static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  static void create_literal (cpp_reader *, cpp_token *, const uchar *,
@@ -88,8 +85,8 @@ add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
    if (buffer->notes_used == buffer->notes_cap)
      {
        buffer->notes_cap = buffer->notes_cap * 2 + 200;
-      buffer->notes = xrealloc (buffer->notes,
-                               buffer->notes_cap * sizeof (_cpp_line_note));
+      buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
+                                  buffer->notes_cap);
      }
  
    buffer->notes[buffer->notes_used].pos = pos;
@@ -430,10 +427,36 @@ name_p (cpp_reader *pfile, const cpp_string *string)
    return 1;
  }
  
+/* After parsing an identifier or other sequence, produce a warning about
+   sequences not in NFC/NFKC.  */
+static void
+warn_about_normalization (cpp_reader *pfile, 
+                         const cpp_token *token,
+                         const struct normalize_state *s)
+{
+  if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
+      && !pfile->state.skipping)
+    {
+      /* Make sure that the token is printed using UCNs, even
+        if we'd otherwise happily print UTF-8.  */
+      unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
+      size_t sz;
+
+      sz = cpp_spell_token (pfile, token, buf, false) - buf;
+      if (NORMALIZE_STATE_RESULT (s) == normalized_C)
+       cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
+                            "`%.*s' is not in NFKC", (int) sz, buf);
+      else
+       cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
+                            "`%.*s' is not in NFC", (int) sz, buf);
+    }
+}
+
  /* Returns TRUE if the sequence starting at buffer->cur is invalid in
     an identifier.  FIRST is TRUE if this starts an identifier.  */
  static bool
-forms_identifier_p (cpp_reader *pfile, int first)
+forms_identifier_p (cpp_reader *pfile, int first,
+                   struct normalize_state *state)
  {
    cpp_buffer *buffer = pfile->buffer;
  
@@ -453,11 +476,13 @@ forms_identifier_p (cpp_reader *pfile, int first)
      }
  
    /* Is this a syntactically valid UCN?  */
-  if (*buffer->cur == '\\'
+  if ((CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
+      && *buffer->cur == '\\'
        && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
      {
        buffer->cur += 2;
-      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
+      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
+                         state))
         return true;
        buffer->cur -= 2;
      }
@@ -467,7 +492,8 @@ forms_identifier_p (cpp_reader *pfile, int first)
  
  /* Lex an identifier starting at BUFFER->CUR - 1.  */
  static cpp_hashnode *
-lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
+lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
+               struct normalize_state *nst)
  {
    cpp_hashnode *result;
    const uchar *cur;
@@ -482,13 +508,16 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
         cur++;
        }
    pfile->buffer->cur = cur;
-  if (starts_ucn || forms_identifier_p (pfile, false))
+  if (starts_ucn || forms_identifier_p (pfile, false, nst))
      {
        /* Slower version for identifiers containing UCNs (or $).  */
        do {
         while (ISIDNUM (*pfile->buffer->cur))
-         pfile->buffer->cur++;
-      } while (forms_identifier_p (pfile, false));
+         {
+           pfile->buffer->cur++;
+           NORMALIZE_STATE_UPDATE_IDNUM (nst);
+         }
+      } while (forms_identifier_p (pfile, false, nst));
        result = _cpp_interpret_identifier (pfile, base,
                                           pfile->buffer->cur - base);
      }
@@ -524,7 +553,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
  
  /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
  static void
-lex_number (cpp_reader *pfile, cpp_string *number)
+lex_number (cpp_reader *pfile, cpp_string *number,
+           struct normalize_state *nst)
  {
    const uchar *cur;
    const uchar *base;
@@ -537,11 +567,14 @@ lex_number (cpp_reader *pfile, cpp_string *number)
  
        /* N.B. ISIDNUM does not include $.  */
        while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
-       cur++;
+       {
+         cur++;
+         NORMALIZE_STATE_UPDATE_IDNUM (nst);
+       }
  
        pfile->buffer->cur = cur;
      }
-  while (forms_identifier_p (pfile, false));
+  while (forms_identifier_p (pfile, false, nst));
  
    number->len = cur - base;
    dest = _cpp_unaligned_alloc (pfile, number->len + 1);
@@ -897,9 +930,13 @@ _cpp_lex_direct (cpp_reader *pfile)
  
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
-      result->type = CPP_NUMBER;
-      lex_number (pfile, &result->val.str);
-      break;
+      {
+       struct normalize_state nst = INITIAL_NORMALIZE_STATE;
+       result->type = CPP_NUMBER;
+       lex_number (pfile, &result->val.str, &nst);
+       warn_about_normalization (pfile, result, &nst);
+       break;
+      }
  
      case 'L':
        /* 'L' may introduce wide characters or strings.  */
@@ -922,13 +959,18 @@ _cpp_lex_direct (cpp_reader *pfile)
      case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
      case 'Y': case 'Z':
        result->type = CPP_NAME;
-      result->val.node = lex_identifier (pfile, buffer->cur - 1, false);
+      {
+       struct normalize_state nst = INITIAL_NORMALIZE_STATE;
+       result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
+                                          &nst);
+       warn_about_normalization (pfile, result, &nst);
+      }
  
        /* Convert named operators to their proper types.  */
        if (result->val.node->flags & NODE_OPERATOR)
         {
           result->flags |= NAMED_OP;
-         result->type = result->val.node->directive_index;
+         result->type = (enum cpp_ttype) result->val.node->directive_index;
         }
        break;
  
@@ -1067,8 +1109,10 @@ _cpp_lex_direct (cpp_reader *pfile)
        result->type = CPP_DOT;
        if (ISDIGIT (*buffer->cur))
         {
+         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
           result->type = CPP_NUMBER;
-         lex_number (pfile, &result->val.str);
+         lex_number (pfile, &result->val.str, &nst);
+         warn_about_normalization (pfile, result, &nst);
         }
        else if (*buffer->cur == '.' && buffer->cur[1] == '.')
         buffer->cur += 2, result->type = CPP_ELLIPSIS;
@@ -1151,11 +1195,13 @@ _cpp_lex_direct (cpp_reader *pfile)
      case '\\':
        {
         const uchar *base = --buffer->cur;
+       struct normalize_state nst = INITIAL_NORMALIZE_STATE;
  
-       if (forms_identifier_p (pfile, true))
+       if (forms_identifier_p (pfile, true, &nst))
           {
             result->type = CPP_NAME;
-           result->val.node = lex_identifier (pfile, base, true);
+           result->val.node = lex_identifier (pfile, base, true, &nst);
+           warn_about_normalization (pfile, result, &nst);
             break;
           }
         buffer->cur++;
@@ -1495,7 +1541,7 @@ new_buff (size_t len)
      len = MIN_BUFF_SIZE;
    len = CPP_ALIGN (len);
  
-  base = xmalloc (len + sizeof (_cpp_buff));
+  base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
    result = (_cpp_buff *) (base + len);
    result->base = base;
    result->cur = base;