X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=libcpp%2Fcharset.c;h=cba19a67178c796dcef1c8c70ac5c43dcbc69071;hb=31b4e36ed91066bfc8b6f8658ad52cfa18feacdd;hp=73e55eef03941a66ee303d18b99c7ca35af58860;hpb=bfa57d81a1dd384739bbddb5daa8e5ea4b5433c1;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/libcpp/charset.c b/libcpp/charset.c
index 73e55eef039..cba19a67178 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1,12 +1,12 @@
 /* CPP Library - charsets
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008
-   Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, 2009,
+   2010 Free Software Foundation, Inc.
 
    Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
+Free Software Foundation; either version 3, or (at your option) any
 later version.
 
 This program is distributed in the hope that it will be useful,
@@ -15,8 +15,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+along with this program; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
 
 #include "config.h"
 #include "system.h"
@@ -169,7 +169,7 @@ static inline int
 one_utf8_to_cppchar (const uchar **inbufp, size_t *inbytesleftp,
 		     cppchar_t *cp)
 {
-  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
+  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
   static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 
   cppchar_t c;
@@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)
 
   pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
   pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+  pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
+  pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
   pfile->char16_cset_desc = init_iconv_desc (pfile,
 					     be ? "UTF-16BE" : "UTF-16LE",
 					     SOURCE_CHARSET);
@@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)
     {
       if (pfile->narrow_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->narrow_cset_desc.cd);
+      if (pfile->utf8_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->utf8_cset_desc.cd);
+      if (pfile->char16_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char16_cset_desc.cd);
+      if (pfile->char32_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char32_cset_desc.cd);
       if (pfile->wide_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->wide_cset_desc.cd);
     }
@@ -948,10 +956,16 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
    ISO/IEC 10646 is NNNNNNNN; the character designated by the
    universal character name \uNNNN is that character whose character
    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
-   for a universal character name is less than 0x20 or in the range
-   0x7F-0x9F (inclusive), or if the universal character name
-   designates a character in the basic source character set, then the
-   program is ill-formed.
+   for a universal character name corresponds to a surrogate code point
+   (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
+   Additionally, if the hexadecimal value for a universal-character-name
+   outside a character or string literal corresponds to a control character
+   (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
+   character in the basic source character set, the program is ill-formed.
+
+   C99 6.4.3: A universal character name shall not specify a character
+   whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
+   or 0060 (`), nor one in the range D800 through DFFF inclusive.
 
    *PSTR must be preceded by "\u" or "\U"; it is assumed that the
    buffer end is delimited by a non-hex digit.  Returns zero if the
@@ -979,9 +993,9 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
     cpp_error (pfile, CPP_DL_WARNING,
 	       "universal character names are only valid in C++ and C99");
   else if (CPP_WTRADITIONAL (pfile) && identifier_pos == 0)
-    cpp_error (pfile, CPP_DL_WARNING,
-	       "the meaning of '\\%c' is different in traditional C",
-	       (int) str[-1]);
+    cpp_warning (pfile, CPP_W_TRADITIONAL,
+	         "the meaning of '\\%c' is different in traditional C",
+	         (int) str[-1]);
 
   if (str[-1] == 'u')
     length = 4;
@@ -1018,9 +1032,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
 		 (int) (str - base), base);
       result = 1;
     }
-  /* The standard permits $, @ and ` to be specified as UCNs.  We use
-     hex escapes so that this also works with EBCDIC hosts.  */
+  /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
+     hex escapes so that this also works with EBCDIC hosts.
+     C++0x permits everything below 0xa0 within literals;
+     ucn_valid_in_identifier will complain about identifiers.  */
   else if ((result < 0xa0
+	    && !CPP_OPTION (pfile, cplusplus)
 	    && (result != 0x24 && result != 0x40 && result != 0x60))
 	   || (result & 0x80000000)
 	   || (result >= 0xD800 && result <= 0xDFFF))
@@ -1157,8 +1174,8 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
   size_t mask = width_to_mask (width);
 
   if (CPP_WTRADITIONAL (pfile))
-    cpp_error (pfile, CPP_DL_WARNING,
-	       "the meaning of '\\x' is different in traditional C");
+    cpp_warning (pfile, CPP_W_TRADITIONAL,
+	         "the meaning of '\\x' is different in traditional C");
 
   from++;  /* Skip 'x'.  */
   while (from < limit)
@@ -1285,8 +1302,8 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
 
     case 'a':
       if (CPP_WTRADITIONAL (pfile))
-	cpp_error (pfile, CPP_DL_WARNING,
-		   "the meaning of '\\a' is different in traditional C");
+	cpp_warning (pfile, CPP_W_TRADITIONAL,
+		     "the meaning of '\\a' is different in traditional C");
       c = charconsts[0];
       break;
 
@@ -1301,7 +1318,7 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
     unknown:
       if (ISGRAPH (c))
 	cpp_error (pfile, CPP_DL_PEDWARN,
-		   "unknown escape sequence '\\%c'", (int) c);
+		   "unknown escape sequence: '\\%c'", (int) c);
       else
 	{
 	  /* diagnostic.c does not support "%03o".  When it does, this
@@ -1330,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
     {
     default:
 	return pfile->narrow_cset_desc;
+    case CPP_UTF8STRING:
+	return pfile->utf8_cset_desc;
     case CPP_CHAR16:
     case CPP_STRING16:
 	return pfile->char16_cset_desc;
@@ -1364,7 +1383,34 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
   for (i = 0; i < count; i++)
     {
       p = from[i].text;
-      if (*p == 'L' || *p == 'u' || *p == 'U') p++;
+      if (*p == 'u')
+	{
+	  if (*++p == '8')
+	    p++;
+	}
+      else if (*p == 'L' || *p == 'U') p++;
+      if (*p == 'R')
+	{
+	  const uchar *prefix;
+
+	  /* Skip over 'R"'.  */
+	  p += 2;
+	  prefix = p;
+	  while (*p != '(')
+	    p++;
+	  p++;
+	  limit = from[i].text + from[i].len;
+	  if (limit >= p + (p - prefix) + 1)
+	    limit -= (p - prefix) + 1;
+
+	  /* Raw strings are all normal characters; these can be fed
+	     directly to convert_cset.  */
+	  if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
+	    goto fail;
+
+	  continue;
+	}
+
       p++; /* Skip leading quote.  */
       limit = from[i].text + from[i].len - 1; /* Skip trailing quote.  */
 
@@ -1463,7 +1509,7 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
 		 "character constant too long for its type");
     }
   else if (i > 1 && CPP_OPTION (pfile, warn_multichar))
-    cpp_error (pfile, CPP_DL_WARNING, "multi-character character constant");
+    cpp_warning (pfile, CPP_W_MULTICHAR, "multi-character character constant");
 
   /* Multichar constants are of type int and therefore signed.  */
   if (i > 1)