1 // natString.cc - Implementation of java.lang.String native methods.
3 /* Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation
5 This file is part of libgcj.
7 This software is copyrighted work licensed under the terms of the
8 Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
17 #include <java/lang/Character.h>
18 #include <java/lang/String.h>
19 #include <java/lang/IndexOutOfBoundsException.h>
20 #include <java/lang/ArrayIndexOutOfBoundsException.h>
21 #include <java/lang/StringIndexOutOfBoundsException.h>
22 #include <java/lang/NullPointerException.h>
23 #include <java/io/ByteArrayOutputStream.h>
24 #include <java/io/OutputStreamWriter.h>
25 #include <java/io/ByteArrayInputStream.h>
26 #include <java/io/InputStreamReader.h>
27 #include <java/util/Locale.h>
28 #include <gnu/gcj/convert/UnicodeToBytes.h>
29 #include <gnu/gcj/convert/BytesToUnicode.h>
32 static void unintern (jobject);
33 static jstring* strhash = NULL;
34 static int strhash_count = 0; /* Number of slots used in strhash. */
35 static int strhash_size = 0; /* Number of slots available in strhash.
36 * Assumed be power of 2! */
38 // Some defines used by toUpperCase / toLowerCase.
40 #define CAPITAL_S 0x0053
41 #define SMALL_I 0x0069
42 #define CAPITAL_I_WITH_DOT 0x0130
43 #define SMALL_DOTLESS_I 0x0131
44 #define CAPITAL_I 0x0049
46 #define DELETED_STRING ((jstring)(~0))
47 #define SET_STRING_IS_INTERNED(STR) /* nothing */
49 #define UNMASK_PTR(Ptr) (((unsigned long) (Ptr)) & ~0x01)
50 #define MASK_PTR(Ptr) (((unsigned long) (Ptr)) | 0x01)
51 #define PTR_MASKED(Ptr) (((unsigned long) (Ptr)) & 0x01)
53 /* Find a slot where the string with elements DATA, length LEN,
54 and hash HASH should go in the strhash table of interned strings. */
56 _Jv_StringFindSlot (jchar* data, jint len, jint hash)
58 JvSynchronize sync (&StringClass);
60 int start_index = hash & (strhash_size - 1);
61 int deleted_index = -1;
63 int index = start_index;
64 /* step must be non-zero, and relatively prime with strhash_size. */
65 jint step = (hash ^ (hash >> 16)) | 1;
68 jstring* ptr = &strhash[index];
69 jstring value = (jstring) UNMASK_PTR (*ptr);
72 if (deleted_index >= 0)
73 return (&strhash[deleted_index]);
77 else if (*ptr == DELETED_STRING)
78 deleted_index = index;
79 else if (value->length() == len
80 && memcmp(JvGetStringChars(value), data, 2*len) == 0)
82 index = (index + step) & (strhash_size - 1);
83 JvAssert (index != start_index);
87 /* Calculate a hash code for the string starting at PTR at given LENGTH.
88 This uses the same formula as specified for java.lang.String.hash. */
91 hashChars (jchar* ptr, jint length)
93 jchar* limit = ptr + length;
95 // Updated specification from
96 // http://www.javasoft.com/docs/books/jls/clarify.html.
98 hash = (31 * hash) + *ptr++;
103 java::lang::String::hashCode()
105 return hashChars(JvGetStringChars(this), length());
109 _Jv_StringGetSlot (jstring str)
111 jchar* data = JvGetStringChars(str);
112 int length = str->length();
113 return _Jv_StringFindSlot(data, length, hashChars (data, length));
117 java::lang::String::rehash()
119 JvSynchronize sync (&StringClass);
124 strhash = (jstring *) _Jv_AllocBytes (strhash_size * sizeof (jstring));
125 memset (strhash, 0, strhash_size * sizeof (jstring));
129 int i = strhash_size;
130 jstring* ptr = strhash + i;
131 int nsize = strhash_size * 2;
132 jstring *next = (jstring *) _Jv_AllocBytes (nsize * sizeof (jstring));
133 memset (next, 0, nsize * sizeof (jstring));
138 if (*ptr == NULL || *ptr == DELETED_STRING)
141 /* This is faster equivalent of
142 * *__JvGetInternSlot(*ptr) = *ptr; */
143 jstring val = (jstring) UNMASK_PTR (*ptr);
144 jint hash = val->hashCode();
145 jint index = hash & (nsize - 1);
146 jint step = (hash ^ (hash >> 16)) | 1;
149 if (next[index] == NULL)
154 index = (index + step) & (nsize - 1);
158 strhash_size = nsize;
164 java::lang::String::intern()
166 JvSynchronize sync (&StringClass);
167 if (3 * strhash_count >= 2 * strhash_size)
169 jstring* ptr = _Jv_StringGetSlot(this);
170 if (*ptr != NULL && *ptr != DELETED_STRING)
172 // See description in unintern() to understand this.
173 *ptr = (jstring) MASK_PTR (*ptr);
174 return (jstring) UNMASK_PTR (*ptr);
176 jstring str = this->data == this ? this
177 : _Jv_NewString(JvGetStringChars(this), this->length());
178 SET_STRING_IS_INTERNED(str);
181 // When string is GC'd, clear the slot in the hash table.
182 _Jv_RegisterFinalizer ((void *) str, unintern);
186 /* Called by String fake finalizer. */
188 unintern (jobject obj)
190 JvSynchronize sync (&StringClass);
191 jstring str = reinterpret_cast<jstring> (obj);
192 jstring* ptr = _Jv_StringGetSlot(str);
193 if (*ptr == NULL || *ptr == DELETED_STRING)
196 // We assume the lowest bit of the pointer is free for our nefarious
197 // manipulations. What we do is set it to `0' (implicitly) when
198 // interning the String. If we subsequently re-intern the same
199 // String, then we set the bit. When finalizing, if the bit is set
200 // then we clear it and re-register the finalizer. We know this is
201 // a safe approach because both the intern() and unintern() acquire
202 // the class lock; this bit can't be manipulated when the lock is
203 // not held. So if we are finalizing and the bit is clear then we
204 // know all references are gone and we can clear the entry in the
205 // hash table. The naive approach of simply clearing the pointer
206 // here fails in the case where a request to intern a new string
207 // with the same contents is made between the time the intern()d
208 // string is found to be unreachable and when the finalizer is
209 // actually run. In this case we could clear a pointer to a valid
210 // string, and future intern() calls for that particular value would
212 if (PTR_MASKED (*ptr))
214 *ptr = (jstring) UNMASK_PTR (*ptr);
215 _Jv_RegisterFinalizer ((void *) obj, unintern);
219 *ptr = DELETED_STRING;
225 _Jv_NewStringUTF (const char *bytes)
227 int size = strlen (bytes);
228 unsigned char *p = (unsigned char *) bytes;
230 int length = _Jv_strLengthUtf8 ((char *) p, size);
234 jstring jstr = JvAllocString (length);
235 jchar *chrs = JvGetStringChars (jstr);
237 p = (unsigned char *) bytes;
238 unsigned char *limit = p + size;
240 *chrs++ = UTF8_GET (p, limit);
246 _Jv_NewStringUtf8Const (Utf8Const* str)
251 unsigned char* data = (unsigned char*) str->data;
252 unsigned char* limit = data + str->length;
253 int length = _Jv_strLengthUtf8(str->data, str->length);
255 if (length <= (int) (sizeof(buffer) / sizeof(jchar)))
262 jstr = JvAllocString(length);
263 chrs = JvGetStringChars(jstr);
269 jchar ch = UTF8_GET(data, limit);
270 hash = (31 * hash) + ch;
275 JvSynchronize sync (&StringClass);
276 if (3 * strhash_count >= 2 * strhash_size)
277 java::lang::String::rehash();
278 jstring* ptr = _Jv_StringFindSlot (chrs, length, hash);
279 if (*ptr != NULL && *ptr != DELETED_STRING)
280 return (jstring) UNMASK_PTR (*ptr);
284 jstr = JvAllocString(length);
285 chrs = JvGetStringChars(jstr);
286 memcpy (chrs, buffer, sizeof(jchar)*length);
289 SET_STRING_IS_INTERNED(jstr);
290 // When string is GC'd, clear the slot in the hash table.
291 _Jv_RegisterFinalizer ((void *) jstr, unintern);
296 _Jv_GetStringUTFLength (jstring string)
299 jchar *ptr = JvGetStringChars (string);
300 jsize i = string->length();
304 if (ch > 0 && ch <= 0x7F)
306 else if (ch <= 0x7FF)
314 // Not sure this quite matches GetStringUTFRegion.
315 // null-termination of result? len? throw exception?
317 _Jv_GetStringUTFRegion (jstring str, jsize start, jsize len, char *buf)
319 jchar *sptr = JvGetStringChars (str) + start;
325 if (ch > 0 && ch <= 0x7F)
327 else if (ch <= 0x7FF)
329 *dptr++ = (char) (0xC0 + ((ch >> 6) & 0x1F));
330 *dptr++ = (char) (0x80 + (ch & 0x3F));
334 *dptr++ = (char) (0xE0 + ((ch >> 12) & 0xF));
335 *dptr++ = (char) (0x80 + ((ch >> 6) & 0x3F));
336 *dptr++ = (char) (0x80 + (ch & 0x3F));
342 /* Put printed (decimal) representation of NUM in a buffer.
343 BUFEND marks the end of the buffer, which must be at least 11 jchars long.
344 Returns the COUNT of jchars written. The result is in
345 (BUFEND - COUNT) (inclusive) upto (BUFEND) (exclusive). */
348 _Jv_FormatInt (jchar* bufend, jint num)
350 register jchar* ptr = bufend;
358 // Must be MIN_VALUE, so handle this special case.
359 // FIXME use 'unsigned jint' for num.
369 *--ptr = (jchar) ((int) '0' + (num % 10));
380 java::lang::String::valueOf (jint num)
382 // Use an array large enough for "-2147483648"; i.e. 11 chars.
384 int i = _Jv_FormatInt (buffer+11, num);
385 return _Jv_NewString (buffer+11-i, i);
389 _Jv_AllocString(jsize len)
391 jsize sz = sizeof(java::lang::String) + len * sizeof(jchar);
393 // We assert that for strings allocated this way, the data field
394 // will always point to the object itself. Thus there is no reason
395 // for the garbage collector to scan any of it.
396 // Furthermore, we're about to overwrite the string data, so
397 // initialization of the object is not an issue.
399 jstring obj = (jstring) _Jv_AllocPtrFreeObject(&StringClass, sz);
401 // Class needs no initialization, and there is no finalizer, so
402 // we can go directly to the collector's allocator interface.
403 jstring obj = (jstring) _Jv_AllocPtrFreeObj(&StringClass, sz);
406 obj->boffset = sizeof(java::lang::String);
412 _Jv_NewString(const jchar *chars, jsize len)
414 jstring str = _Jv_AllocString(len);
415 jchar* data = JvGetStringChars (str);
422 _Jv_NewStringLatin1(const char *bytes, jsize len)
424 jstring str = JvAllocString(len);
425 jchar* data = JvGetStringChars (str);
427 *data++ = *(unsigned char*)bytes++;
432 java::lang::String::init ()
435 boffset = sizeof(java::lang::String);
440 java::lang::String::init(jcharArray chars, jint offset, jint count,
444 throw new NullPointerException;
445 jsize data_size = JvGetArrayLength (chars);
446 if (offset < 0 || count < 0 || offset + count < 0
447 || offset + count > data_size)
448 throw new ArrayIndexOutOfBoundsException;
453 array = JvNewCharArray(count);
454 pdst = elements (array);
455 memcpy (pdst, elements (chars) + offset, count * sizeof (jchar));
459 JvAssert (offset == 0);
461 pdst = elements (array);
465 boffset = (char *) pdst - (char *) array;
470 java::lang::String::init(jbyteArray ascii, jint hibyte, jint offset,
474 throw new NullPointerException;
475 jsize data_size = JvGetArrayLength (ascii);
476 if (offset < 0 || count < 0 || offset + count < 0
477 || offset + count > data_size)
478 throw new ArrayIndexOutOfBoundsException;
479 jcharArray array = JvNewCharArray(count);
480 jbyte *psrc = elements (ascii) + offset;
481 jchar *pdst = elements (array);
483 boffset = (char *) pdst - (char *) array;
485 hibyte = (hibyte & 0xff) << 8;
486 while (-- count >= 0)
488 *pdst++ = hibyte | (*psrc++ & 0xff);
493 java::lang::String::init (jbyteArray bytes, jint offset, jint count,
497 throw new NullPointerException;
498 jsize data_size = JvGetArrayLength (bytes);
499 if (offset < 0 || count < 0 || offset + count < 0
500 || offset + count > data_size)
501 throw new ArrayIndexOutOfBoundsException;
502 jcharArray array = JvNewCharArray (count);
503 gnu::gcj::convert::BytesToUnicode *converter
504 = gnu::gcj::convert::BytesToUnicode::getDecoder(encoding);
507 converter->setInput(bytes, offset, offset+count);
508 while (converter->inpos < converter->inlength)
510 int done = converter->read(array, outpos, avail);
513 jint new_size = 2 * (outpos + avail);
514 jcharArray new_array = JvNewCharArray (new_size);
515 memcpy (elements (new_array), elements (array),
516 outpos * sizeof(jchar));
518 avail = new_size - outpos;
527 this->boffset = (char *) elements (array) - (char *) array;
528 this->count = outpos;
532 java::lang::String::equals(jobject anObject)
534 if (anObject == NULL)
536 if (anObject == this)
538 if (anObject->getClass() != &StringClass)
540 jstring other = (jstring) anObject;
541 if (count != other->count)
543 /* if both are interned, return false. */
545 jchar *xptr = JvGetStringChars (this);
546 jchar *yptr = JvGetStringChars (other);
549 if (*xptr++ != *yptr++)
556 java::lang::String::charAt(jint i)
558 if (i < 0 || i >= count)
559 throw new java::lang::StringIndexOutOfBoundsException;
560 return JvGetStringChars(this)[i];
564 java::lang::String::getChars(jint srcBegin, jint srcEnd,
565 jcharArray dst, jint dstBegin)
567 jint dst_length = JvGetArrayLength (dst);
568 if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
569 throw new java::lang::StringIndexOutOfBoundsException;
570 if (dstBegin < 0 || dstBegin + (srcEnd-srcBegin) > dst_length)
571 throw new ArrayIndexOutOfBoundsException;
572 jchar *dPtr = elements (dst) + dstBegin;
573 jchar *sPtr = JvGetStringChars (this) + srcBegin;
574 jint i = srcEnd-srcBegin;
580 java::lang::String::getBytes (jstring enc)
582 jint todo = length();
584 jbyteArray buffer = JvNewByteArray(todo);
587 gnu::gcj::convert::UnicodeToBytes *converter
588 = gnu::gcj::convert::UnicodeToBytes::getEncoder(enc);
591 converter->setOutput(buffer, bufpos);
592 int converted = converter->write(this, offset, todo, NULL);
593 bufpos = converter->count;
597 jbyteArray newbuffer = JvNewByteArray(buflen);
598 memcpy (elements (newbuffer), elements (buffer), bufpos);
607 if (bufpos == buflen)
609 jbyteArray result = JvNewByteArray(bufpos);
610 memcpy (elements (result), elements (buffer), bufpos);
615 java::lang::String::getBytes(jint srcBegin, jint srcEnd,
616 jbyteArray dst, jint dstBegin)
618 jint dst_length = JvGetArrayLength (dst);
619 if (srcBegin < 0 || srcBegin > srcEnd || srcEnd > count)
620 throw new java::lang::StringIndexOutOfBoundsException;
621 if (dstBegin < 0 || dstBegin + (srcEnd-srcBegin) > dst_length)
622 throw new ArrayIndexOutOfBoundsException;
623 jbyte *dPtr = elements (dst) + dstBegin;
624 jchar *sPtr = JvGetStringChars (this) + srcBegin;
625 jint i = srcEnd-srcBegin;
627 *dPtr++ = (jbyte) *sPtr++;
631 java::lang::String::toCharArray()
633 jcharArray array = JvNewCharArray(count);
634 jchar *dPtr = elements (array);
635 jchar *sPtr = JvGetStringChars (this);
643 java::lang::String::equalsIgnoreCase (jstring anotherString)
645 if (anotherString == NULL || count != anotherString->count)
647 jchar *tptr = JvGetStringChars (this);
648 jchar *optr = JvGetStringChars (anotherString);
655 && (java::lang::Character::toLowerCase (tch)
656 != java::lang::Character::toLowerCase (och))
657 && (java::lang::Character::toUpperCase (tch)
658 != java::lang::Character::toUpperCase (och)))
665 java::lang::String::regionMatches (jint toffset,
666 jstring other, jint ooffset, jint len)
668 if (toffset < 0 || ooffset < 0
669 || toffset + len > count
670 || ooffset + len > other->count)
672 jchar *tptr = JvGetStringChars (this) + toffset;
673 jchar *optr = JvGetStringChars (other) + ooffset;
677 if (*tptr++ != *optr++)
684 java::lang::String::compareTo (jstring anotherString)
686 jchar *tptr = JvGetStringChars (this);
687 jchar *optr = JvGetStringChars (anotherString);
688 jint tlen = this->count;
689 jint olen = anotherString->count;
690 jint i = tlen > olen ? olen : tlen;
696 return (jint) tch - (jint) och;
702 java::lang::String::regionMatches (jboolean ignoreCase, jint toffset,
703 jstring other, jint ooffset, jint len)
705 if (toffset < 0 || ooffset < 0
706 || toffset + len > count
707 || ooffset + len > other->count)
709 jchar *tptr = JvGetStringChars (this) + toffset;
710 jchar *optr = JvGetStringChars (other) + ooffset;
717 if ((java::lang::Character::toLowerCase (tch)
718 != java::lang::Character::toLowerCase (och))
719 && (java::lang::Character::toUpperCase (tch)
720 != java::lang::Character::toUpperCase (och)))
735 java::lang::String::startsWith (jstring prefix, jint toffset)
737 jint i = prefix->count;
738 if (toffset < 0 || toffset + i > count)
740 jchar *xptr = JvGetStringChars (this) + toffset;
741 jchar *yptr = JvGetStringChars (prefix);
744 if (*xptr++ != *yptr++)
751 java::lang::String::indexOf (jint ch, jint fromIndex)
755 jchar *ptr = JvGetStringChars(this);
758 if (fromIndex >= count)
760 if (ptr[fromIndex] == ch)
766 java::lang::String::indexOf (jstring s, jint fromIndex)
768 const jchar *const xchars = JvGetStringChars(s);
769 const jchar *const ychars = JvGetStringChars(this) + fromIndex;
771 const int xlength = s->length ();
772 const int ylength = length () - fromIndex;
777 while (i < ylength && j < xlength)
779 if (xchars[j] != ychars[i])
789 return fromIndex + i - xlength;
795 java::lang::String::lastIndexOf (jint ch, jint fromIndex)
797 if (fromIndex >= count)
798 fromIndex = count - 1;
799 jchar *ptr = JvGetStringChars(this);
804 if (ptr[fromIndex] == ch)
810 java::lang::String::substring (jint beginIndex, jint endIndex)
812 if (beginIndex < 0 || endIndex > count || beginIndex > endIndex)
813 throw new StringIndexOutOfBoundsException;
814 if (beginIndex == 0 && endIndex == count)
816 jint newCount = endIndex - beginIndex;
817 if (newCount <= 8) // Optimization, mainly for GC.
818 return JvNewString(JvGetStringChars(this) + beginIndex, newCount);
819 jstring s = new String();
822 s->boffset = boffset + sizeof(jchar) * beginIndex;
827 java::lang::String::concat(jstring str)
829 jint str_count = str->count;
832 jstring result = JvAllocString(count + str_count);
833 jchar *dstPtr = JvGetStringChars(result);
834 jchar *srcPtr = JvGetStringChars(this);
837 *dstPtr++ = *srcPtr++;
838 srcPtr = JvGetStringChars(str);
841 *dstPtr++ = *srcPtr++;
846 java::lang::String::replace (jchar oldChar, jchar newChar)
849 jchar* chrs = JvGetStringChars (this);
854 if (chrs[i] == oldChar)
857 jstring result = JvAllocString (count);
858 jchar *dPtr = JvGetStringChars (result);
859 for (int j = 0; j < i; j++)
861 for (; i < count; i++)
872 java::lang::String::toLowerCase (java::util::Locale *locale)
875 jchar* chrs = JvGetStringChars(this);
878 bool handle_tr = false;
881 String *lang = locale->getLanguage ();
882 if (lang->length () == 2
883 && lang->charAt (0) == 't'
884 && lang->charAt (1) == 'r')
892 jchar origChar = chrs[i];
894 if (handle_tr && (origChar == CAPITAL_I
895 || origChar == CAPITAL_I_WITH_DOT))
898 ch = java::lang::Character::toLowerCase(origChar);
902 jstring result = JvAllocString(count);
903 jchar *dPtr = JvGetStringChars (result);
904 for (int j = 0; j < i; j++)
907 for (; i < count; i++)
909 if (handle_tr && chrs[i] == CAPITAL_I)
910 *dPtr++ = SMALL_DOTLESS_I;
911 else if (handle_tr && chrs[i] == CAPITAL_I_WITH_DOT)
914 *dPtr++ = java::lang::Character::toLowerCase(chrs[i]);
920 java::lang::String::toUpperCase (java::util::Locale *locale)
923 jchar* chrs = JvGetStringChars(this);
926 // When handling a specific locale there might be special rules.
927 // Currently all existing rules are simply handled inline, as there
928 // are only two and they are documented in the online 1.2 docs.
929 bool handle_esset = locale != NULL;
930 bool handle_tr = false;
933 String *lang = locale->getLanguage ();
934 if (lang->length () == 2
935 && lang->charAt (0) == 't'
936 && lang->charAt (1) == 'r')
940 int new_count = count;
941 bool new_string = false;
946 jchar origChar = chrs[i];
948 if (handle_esset && origChar == ESSET)
953 else if (handle_tr && (origChar == SMALL_I
954 || origChar == SMALL_DOTLESS_I))
958 ch = java::lang::Character::toUpperCase(origChar);
963 if (new_string && ! handle_esset)
968 jstring result = JvAllocString(new_count);
969 jchar *dPtr = JvGetStringChars (result);
970 for (i = 0; i < count; i++)
972 if (handle_esset && chrs[i] == ESSET)
977 else if (handle_tr && chrs[i] == SMALL_I)
978 *dPtr++ = CAPITAL_I_WITH_DOT;
979 else if (handle_tr && chrs[i] == SMALL_DOTLESS_I)
982 *dPtr++ = java::lang::Character::toUpperCase(chrs[i]);
988 java::lang::String::trim ()
990 jchar* chrs = JvGetStringChars(this);
991 if (count == 0 || (chrs[0] > ' ' && chrs[count-1] > ' '))
996 if (preTrim == count)
998 if (chrs[preTrim] > ' ')
1001 jint endTrim = count;
1002 while (chrs[endTrim-1] <= ' ')
1004 return substring(preTrim, endTrim);
1008 java::lang::String::valueOf(jcharArray data, jint offset, jint count)
1010 jint data_length = JvGetArrayLength (data);
1011 if (offset < 0 || count < 0 || offset+count > data_length)
1012 throw new ArrayIndexOutOfBoundsException;
1013 jstring result = JvAllocString(count);
1014 jchar *sPtr = elements (data) + offset;
1015 jchar *dPtr = JvGetStringChars(result);
1016 while (--count >= 0)
1022 java::lang::String::valueOf(jchar c)
1024 jstring result = JvAllocString(1);
1025 JvGetStringChars (result)[0] = c;