1 /* Shared functions related to mangling names for the GNU compiler
2 for the Java(TM) language.
3 Copyright (C) 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
21 Java and all Java-based marks are trademarks or registered trademarks
22 of Sun Microsystems, Inc. in the United States and other countries.
23 The Free Software Foundation is independent of Sun Microsystems, Inc. */
25 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
29 #include "coretypes.h"
32 #include "java-tree.h"
34 #include "diagnostic-core.h"
37 static void append_unicode_mangled_name (const char *, int);
39 static int unicode_mangling_length (const char *, int);
42 extern struct obstack *mangle_obstack;
45 utf8_cmp (const unsigned char *str, int length, const char *name)
47 const unsigned char *limit = str + length;
50 for (i = 0; name[i]; ++i)
52 int ch = UTF8_GET (str, limit);
57 return str == limit ? 0 : 1;
60 /* A sorted list of all C++ keywords. If you change this, be sure
61 also to change the list in
62 libjava/classpath/tools/gnu/classpath/tools/javah/Keywords.java. */
63 static const char *const cxx_keywords[] =
171 /* Return true if NAME is a C++ keyword. */
173 cxx_keyword_p (const char *name, int length)
175 int last = ARRAY_SIZE (cxx_keywords);
177 int mid = (last + first) / 2;
180 for (mid = (last + first) / 2;
182 old = mid, mid = (last + first) / 2)
184 int kwl = strlen (cxx_keywords[mid]);
185 int min_length = kwl > length ? length : kwl;
186 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
191 /* We've found a match if all the remaining characters are `$'. */
192 for (i = min_length; i < length && name[i] == '$'; ++i)
207 /* If NAME happens to be a C++ keyword, add `$'. */
208 #define MANGLE_CXX_KEYWORDS(NAME, LEN) \
211 if (cxx_keyword_p ((NAME), (LEN))) \
213 char *tmp_buf = (char *)alloca ((LEN)+1); \
214 memcpy (tmp_buf, (NAME), (LEN)); \
223 /* If the assembler doesn't support UTF8 in symbol names, some
224 characters might need to be escaped. */
228 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
229 appropriately mangled (with Unicode escapes if needed) to
230 MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
231 frequently that they could be cached. */
234 append_gpp_mangled_name (const char *name, int len)
236 int encoded_len, needs_escapes;
239 MANGLE_CXX_KEYWORDS (name, len);
241 encoded_len = unicode_mangling_length (name, len);
242 needs_escapes = encoded_len > 0;
244 sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
245 obstack_grow (mangle_obstack, buf, strlen (buf));
248 append_unicode_mangled_name (name, len);
250 obstack_grow (mangle_obstack, name, len);
253 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
254 appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
255 Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
256 which case `__U' will be mangled `__U_'. */
259 append_unicode_mangled_name (const char *name, int len)
261 const unsigned char *ptr;
262 const unsigned char *limit = (const unsigned char *)name + len;
264 for (ptr = (const unsigned char *) name; ptr < limit; )
266 int ch = UTF8_GET(ptr, limit);
268 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
270 obstack_1grow (mangle_obstack, ch);
273 /* Everything else needs encoding */
277 if (ch == '_' || ch == 'U')
279 /* Prepare to recognize __U */
280 if (ch == '_' && (uuU < 3))
283 obstack_1grow (mangle_obstack, ch);
285 /* We recognize __U that we wish to encode
286 __U_. Finish the encoding. */
287 else if (ch == 'U' && (uuU == 2))
290 obstack_grow (mangle_obstack, "U_", 2);
292 /* Otherwise, just reset uuU and emit the character we
297 obstack_1grow (mangle_obstack, ch);
301 sprintf (buf, "__U%x_", ch);
302 obstack_grow (mangle_obstack, buf, strlen (buf));
308 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
309 length of the string as mangled (a la g++) including Unicode
310 escapes. If no escapes are needed, return 0. */
313 unicode_mangling_length (const char *name, int len)
315 const unsigned char *ptr;
316 const unsigned char *limit = (const unsigned char *)name + len;
317 int need_escapes = 0; /* Whether we need an escape or not */
318 int num_chars = 0; /* Number of characters in the mangled name */
319 int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
320 for (ptr = (const unsigned char *) name; ptr < limit; )
322 int ch = UTF8_GET(ptr, limit);
325 error ("internal error - invalid Utf8 name");
326 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
331 /* Everything else needs encoding */
334 int encoding_length = 2;
336 if (ch == '_' || ch == 'U')
338 /* It's always at least one character. */
341 /* Prepare to recognize __U */
342 if (ch == '_' && (uuU < 3))
345 /* We recognize __U that we wish to encode __U_, we
346 count one more character. */
347 else if (ch == 'U' && (uuU == 2))
353 /* Otherwise, just reset uuU */
365 num_chars += (4 + encoding_length);
378 /* The assembler supports UTF8, we don't use escapes. Mangling is
379 simply <N>NAME. <N> is the number of UTF8 encoded characters that
380 are found in NAME. Note that `java', `lang' and `Object' are used
381 so frequently that they could be cached. */
384 append_gpp_mangled_name (const char *name, int len)
386 const unsigned char *ptr;
387 const unsigned char *limit;
391 MANGLE_CXX_KEYWORDS (name, len);
393 limit = (const unsigned char *)name + len;
395 /* Compute the length of the string we wish to mangle. */
396 for (encoded_len = 0, ptr = (const unsigned char *) name;
397 ptr < limit; encoded_len++)
399 int ch = UTF8_GET(ptr, limit);
402 error ("internal error - invalid Utf8 name");
405 sprintf (buf, "%d", encoded_len);
406 obstack_grow (mangle_obstack, buf, strlen (buf));
407 obstack_grow (mangle_obstack, name, len);
410 #endif /* HAVE_AS_UTF8 */