* java/lang/Character.java (SIZE, MAX_CACHE, charCache,

author tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>

Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)

committer tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>

Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)
author tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)
committer tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)
diff --git a/libjava/ChangeLog b/libjava/ChangeLog

index 76142c5..a5c5c40 100644 (file)
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,17 @@
+2006-01-06  Tom Tromey  <tromey@redhat.com>
+
+       * java/lang/Character.java (SIZE, MAX_CACHE, charCache,
+       MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath.
+       (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE,
+       MAX_LOW_SURROGATE): Javadoc fixes.
+       (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate,
+       isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New
+       methods from Classpath.
+       * java/lang/String.java (codePointAt, codePointBefore,
+       codePointCount, contains, replace): New methods from Classpath.
+       (contentEquals): Declare.
+       * java/lang/natString.cc (contentEquals): New method.
+
  2005-12-26  Anthony Green  <green@redhat.com>
  
         * gnu/java/nio/SocketChannelImpl.java (read): Compute the right amount
diff --git a/libjava/java/lang/Character.java b/libjava/java/lang/Character.java

index aa29e0b..3cb73d0 100644 (file)
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
@@ -1,5 +1,5 @@
  /* java.lang.Character -- Wrapper class for char, and Unicode subsets
-   Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
  
  This file is part of GNU Classpath.
  
@@ -1040,6 +1040,18 @@ public final class Character implements Serializable, Comparable
    public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
  
    /**
+   * The number of bits needed to represent a <code>char</code>.
+   * @since 1.5
+   */
+  public static final int SIZE = 16;
+
+  // This caches some Character values, and is used by boxing
+  // conversions via valueOf().  We must cache at least 0..127;
+  // this constant controls how much we actually cache.
+  private static final int MAX_CACHE = 127;
+  private static Character[] charCache = new Character[MAX_CACHE + 1];
+
+  /**
     * Lu = Letter, Uppercase (Informative).
     *
     * @since 1.1
@@ -1434,34 +1446,48 @@ public final class Character implements Serializable, Comparable
  
  
    /**
-   * Minimum high surrrogate code in UTF-16 encoding.
+   * Minimum high surrogate code in UTF-16 encoding.
     *
     * @since 1.5
     */
    public static final char MIN_HIGH_SURROGATE = '\ud800';
  
    /**
-   * Maximum high surrrogate code in UTF-16 encoding.
+   * Maximum high surrogate code in UTF-16 encoding.
     *
     * @since 1.5
     */
    public static final char MAX_HIGH_SURROGATE = '\udbff';
   
    /**
-   * Minimum low surrrogate code in UTF-16 encoding.
+   * Minimum low surrogate code in UTF-16 encoding.
     *
     * @since 1.5
     */
    public static final char MIN_LOW_SURROGATE = '\udc00';
  
    /**
-   * Maximum low surrrogate code in UTF-16 encoding.
+   * Maximum low surrogate code in UTF-16 encoding.
     *
     * @since 1.5
     */
    public static final char MAX_LOW_SURROGATE = '\udfff';
  
    /**
+   * Minimum surrogate code in UTF-16 encoding.
+   *
+   * @since 1.5
+   */
+  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+  /**
+   * Maximum low surrogate code in UTF-16 encoding.
+   *
+   * @since 1.5
+   */
+  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
+  /**
     * Grabs an attribute offset from the Unicode attribute database. The lower
     * 5 bits are the character type, the next 2 bits are flags, and the top
     * 9 bits are the offset into the attribute tables. Note that the top 9
@@ -2213,6 +2239,37 @@ public final class Character implements Serializable, Comparable
    }
  
    /**
+   * Returns an <code>Character</code> object wrapping the value.
+   * In contrast to the <code>Character</code> constructor, this method
+   * will cache some values.  It is used by boxing conversion.
+   *
+   * @param val the value to wrap
+   * @return the <code>Character</code>
+   * 
+   * @since 1.5
+   */
+  public static Character valueOf(char val)
+  {
+    if (val > MAX_CACHE)
+      return new Character(val);
+    synchronized (charCache)
+      {
+    if (charCache[val - MIN_VALUE] == null)
+      charCache[val - MIN_VALUE] = new Character(val);
+    return charCache[val - MIN_VALUE];
+      }
+  }
+
+  /**
+   * Reverse the bytes in val.
+   * @since 1.5
+   */
+  public static char reverseBytes(char val)
+  {
+    return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
+  }
+
+  /**
     * Converts a unicode code point to a UTF-16 representation of that
     * code point.
     * 
@@ -2280,7 +2337,7 @@ public final class Character implements Serializable, Comparable
     * Return number of 16-bit characters required to represent the given
     * code point.
     *
-   * @param codePoint a uncode code point
+   * @param codePoint a unicode code point
     *
     * @return 2 if codePoint >= 0x10000, 1 otherwise.
     *
@@ -2325,4 +2382,210 @@ public final class Character implements Serializable, Comparable
    {
      return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
    }
+
+  /**
+   * Return true if the given character is a high surrogate.
+   * @param ch the character
+   * @return true if the character is a high surrogate character
+   *
+   * @since 1.5
+   */
+  public static boolean isHighSurrogate(char ch)
+  {
+    return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
+  }
+
+  /**
+   * Return true if the given character is a low surrogate.
+   * @param ch the character
+   * @return true if the character is a low surrogate character
+   *
+   * @since 1.5
+   */
+  public static boolean isLowSurrogate(char ch)
+  {
+    return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
+  }
+
+  /**
+   * Return true if the given characters compose a surrogate pair.
+   * This is true if the first character is a high surrogate and the
+   * second character is a low surrogate.
+   * @param ch1 the first character
+   * @param ch2 the first character
+   * @return true if the characters compose a surrogate pair
+   *
+   * @since 1.5
+   */
+  public static boolean isSurrogatePair(char ch1, char ch2)
+  {
+    return isHighSurrogate(ch1) && isLowSurrogate(ch2);
+  }
+
+  /**
+   * Given a valid surrogate pair, this returns the corresponding
+   * code point.
+   * @param high the high character of the pair
+   * @param low the low character of the pair
+   * @return the corresponding code point
+   *
+   * @since 1.5
+   */
+  public static int toCodePoint(char high, char low)
+  {
+    return ((high - MIN_HIGH_SURROGATE) << 10) + (low - MIN_LOW_SURROGATE);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * This is like CharSequence#charAt(int), but if the character is
+   * the start of a surrogate pair, and there is a following
+   * character, and this character completes the pair, then the
+   * corresponding supplementary code point is returned.  Otherwise,
+   * the character at the index is returned.
+   *
+   * @param sequence the CharSequence
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointAt(CharSequence sequence, int index)
+  {
+    int len = sequence.length();
+    if (index < 0 || index >= len)
+      throw new IndexOutOfBoundsException();
+    char high = sequence.charAt(index);
+    if (! isHighSurrogate(high) || ++index >= len)
+      return high;
+    char low = sequence.charAt(index);
+    if (! isLowSurrogate(low))
+      return high;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * If the character is the start of a surrogate pair, and there is a
+   * following character, and this character completes the pair, then
+   * the corresponding supplementary code point is returned.
+   * Otherwise, the character at the index is returned.
+   *
+   * @param chars the character array in which to look
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointAt(char[] chars, int index)
+  {
+    return codePointAt(chars, index, chars.length);
+  }
+
+  /**
+   * Get the code point at the specified index in the CharSequence.
+   * If the character is the start of a surrogate pair, and there is a
+   * following character within the specified range, and this
+   * character completes the pair, then the corresponding
+   * supplementary code point is returned.  Otherwise, the character
+   * at the index is returned.
+   *
+   * @param chars the character array in which to look
+   * @param index the index of the codepoint to get, starting at 0
+   * @param limit the limit past which characters should not be examined
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;=
+   * limit, or if limit is negative or &gt;= the length of the array
+   * @since 1.5
+   */
+  public static int codePointAt(char[] chars, int index, int limit)
+  {
+    if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+      throw new IndexOutOfBoundsException();
+    char high = chars[index];
+    if (! isHighSurrogate(high) || ++index >= limit)
+      return high;
+    char low = chars[index];
+    if (! isLowSurrogate(low))
+      return high;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(char[], int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.
+   *
+   * @param chars the character array
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointBefore(char[] chars, int index)
+  {
+    return codePointBefore(chars, index, 1);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(char[], int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.  The start parameter is used to
+   * limit the range of the array which may be examined.
+   *
+   * @param chars the character array
+   * @param index the index just past the codepoint to get, starting at 0
+   * @param start the index before which characters should not be examined
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
+   * the length of the array, or if limit is negative or &gt;= the
+   * length of the array
+   * @since 1.5
+   */
+  public static int codePointBefore(char[] chars, int index, int start)
+  {
+    if (index < start || index > chars.length
+       || start < 0 || start >= chars.length)
+      throw new IndexOutOfBoundsException();
+    --index;
+    char low = chars[index];
+    if (! isLowSurrogate(low) || --index < start)
+      return low;
+    char high = chars[index];
+    if (! isHighSurrogate(high))
+      return low;
+    return toCodePoint(high, low);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(CharSequence, int), but checks the characters at
+   * <code>index-1</code> and <code>index-2</code> to see if they form
+   * a supplementary code point.  If they do not, the character at
+   * <code>index-1</code> is returned.
+   *
+   * @param sequence the CharSequence
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public static int codePointBefore(CharSequence sequence, int index)
+  {
+    int len = sequence.length();
+    if (index < 1 || index > len)
+      throw new IndexOutOfBoundsException();
+    --index;
+    char low = sequence.charAt(index);
+    if (! isLowSurrogate(low) || --index < 0)
+      return low;
+    char high = sequence.charAt(index);
+    if (! isHighSurrogate(high))
+      return low;
+    return toCodePoint(high, low);
+  }
  } // class Character
diff --git a/libjava/java/lang/String.java b/libjava/java/lang/String.java

index 95ad1fe..3e0bfbe 100644 (file)
--- a/libjava/java/lang/String.java
+++ b/libjava/java/lang/String.java
@@ -1,5 +1,5 @@
  /* String.java -- immutable character sequences; the object of string literals
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
     Free Software Foundation, Inc.
  
  This file is part of GNU Classpath.
@@ -455,6 +455,40 @@ public final class String implements Serializable, Comparable, CharSequence
    public native char charAt(int index);
  
    /**
+   * Get the code point at the specified index.  This is like #charAt(int),
+   * but if the character is the start of a surrogate pair, and the
+   * following character completes the pair, then the corresponding
+   * supplementary code point is returned.
+   * @param index the index of the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   * @since 1.5
+   */
+  public synchronized int codePointAt(int index)
+  {
+    // Use the CharSequence overload as we get better range checking
+    // this way.
+    return Character.codePointAt(this, index);
+  }
+
+  /**
+   * Get the code point before the specified index.  This is like
+   * #codePointAt(int), but checks the characters at <code>index-1</code> and
+   * <code>index-2</code> to see if they form a supplementary code point.
+   * @param index the index just past the codepoint to get, starting at 0
+   * @return the codepoint at the specified index
+   * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+   *         (while unspecified, this is a StringIndexOutOfBoundsException)
+   * @since 1.5
+   */
+  public synchronized int codePointBefore(int index)
+  {
+    // Use the CharSequence overload as we get better range checking
+    // this way.
+    return Character.codePointBefore(this, index);
+  }
+
+  /**
     * Copies characters from this String starting at a specified start index,
     * ending at a specified stop index, to a character array starting at
     * a specified destination begin index.
@@ -566,6 +600,18 @@ public final class String implements Serializable, Comparable, CharSequence
    public native boolean contentEquals(StringBuffer buffer);
  
    /**
+   * Compares the given CharSequence to this String. This is true if
+   * the CharSequence has the same content as this String at this
+   * moment.
+   *
+   * @param seq the CharSequence to compare to
+   * @return true if CharSequence has the same character sequence
+   * @throws NullPointerException if the given CharSequence is null
+   * @since 1.5
+   */
+  public native boolean contentEquals(CharSequence seq);
+
+  /**
     * Compares a String to this String, ignoring case. This does not handle
     * multi-character capitalization exceptions; instead the comparison is
     * made on a character-by-character basis, and is true if:<br><ul>
@@ -1259,6 +1305,88 @@ public final class String implements Serializable, Comparable, CharSequence
     */
    public native String intern();
  
+  /**
+   * Return the number of code points between two indices in the
+   * <code>String</code>.  An unpaired surrogate counts as a
+   * code point for this purpose.  Characters outside the indicated
+   * range are not examined, even if the range ends in the middle of a
+   * surrogate pair.
+   *
+   * @param start the starting index
+   * @param end one past the ending index
+   * @return the number of code points
+   * @since 1.5
+   */
+  public synchronized int codePointCount(int start, int end)
+  {
+    if (start < 0 || end >= count || start > end)
+      throw new StringIndexOutOfBoundsException();
+
+    int count = 0;
+    while (start < end)
+      {
+       char base = charAt(start);
+       if (base < Character.MIN_HIGH_SURROGATE
+           || base > Character.MAX_HIGH_SURROGATE
+           || start == end
+           || start == count
+           || charAt(start + 1) < Character.MIN_LOW_SURROGATE
+           || charAt(start + 1) > Character.MAX_LOW_SURROGATE)
+         {
+           // Nothing.
+         }
+       else
+         {
+           // Surrogate pair.
+           ++start;
+         }
+       ++start;
+       ++count;
+      }
+    return count;
+  }
+
+  /**
+   * Returns true iff this String contains the sequence of Characters
+   * described in s.
+   * @param s the CharSequence
+   * @return true iff this String contains s
+   *
+   * @since 1.5
+   */
+  public boolean contains (CharSequence s)
+  {
+    return this.indexOf(s.toString()) != -1;
+  }
+
+  /**
+   * Returns a string that is this string with all instances of the sequence
+   * represented by <code>target</code> replaced by the sequence in 
+   * <code>replacement</code>.
+   * @param target the sequence to be replaced
+   * @param replacement the sequence used as the replacement
+   * @return the string constructed as above
+   */
+  public String replace (CharSequence target, CharSequence replacement)
+  {
+    String targetString = target.toString();
+    String replaceString = replacement.toString();
+    int targetLength = target.length();
+    int replaceLength = replacement.length();
+    
+    int startPos = this.indexOf(targetString);
+    StringBuilder result = new StringBuilder(this);    
+    while (startPos != -1)
+      {
+        // Replace the target with the replacement
+        result.replace(startPos, startPos + targetLength, replaceString);
+
+        // Search for a new occurrence of the target
+        startPos = result.indexOf(targetString, startPos + replaceLength);
+      }
+    return result.toString();
+  }
+
  
    private native void init(char[] chars, int offset, int count,
                            boolean dont_copy);
diff --git a/libjava/java/lang/natString.cc b/libjava/java/lang/natString.cc

index c8f3129..3f63081 100644 (file)
--- a/libjava/java/lang/natString.cc
+++ b/libjava/java/lang/natString.cc
@@ -1,6 +1,6 @@
  // natString.cc - Implementation of java.lang.String native methods.
  
-/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005  Free Software Foundation
+/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006  Free Software Foundation
  
     This file is part of libgcj.
  
@@ -15,6 +15,7 @@ details.  */
  
  #include <gcj/cni.h>
  #include <java/lang/Character.h>
+#include <java/lang/CharSequence.h>
  #include <java/lang/String.h>
  #include <java/lang/IndexOutOfBoundsException.h>
  #include <java/lang/ArrayIndexOutOfBoundsException.h>
@@ -564,6 +565,18 @@ java::lang::String::contentEquals(java::lang::StringBuffer* buffer)
    return true;
  }
  
+jboolean
+java::lang::String::contentEquals(java::lang::CharSequence *seq)
+{
+  if (seq->length() != count)
+    return false;
+  jchar *value = JvGetStringChars(this);
+  for (int i = 0; i < count; ++i)
+    if (value[i] != seq->charAt(i))
+      return false;
+  return true;
+}
+
  jchar
  java::lang::String::charAt(jint i)
  {
author	tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
	Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)
committer	tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
	Sat, 7 Jan 2006 00:46:28 +0000 (00:46 +0000)
libjava/ChangeLog		patch \| blob \| history
libjava/java/lang/Character.java		patch \| blob \| history
libjava/java/lang/String.java		patch \| blob \| history
libjava/java/lang/natString.cc		patch \| blob \| history