2002-11-11 Jesse Rosenstock <jmr@ugcs.caltech.edu>

author mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)

committer mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)
author mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)
committer mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)
diff --git a/libjava/ChangeLog b/libjava/ChangeLog

index 4ddee78..0732a56 100644 (file)
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,18 @@
+2002-11-11  Jesse Rosenstock  <jmr@ugcs.caltech.edu>
+
+       * gnu/java/nio/charset/ISO_8859_1.java,
+       gnu/java/nio/charset/Provider.java,
+       gnu/java/nio/charset/US_ASCII.java,
+       gnu/java/nio/charset/UTF_16.java,
+       gnu/java/nio/charset/UTF_16BE.java,
+       gnu/java/nio/charset/UTF_16Decoder.java,
+       gnu/java/nio/charset/UTF_16Encoder.java,
+       gnu/java/nio/charset/UTF_16LE.java,
+       gnu/java/nio/charset/UTF_8.java: New files.
+       * Makefile.am ():
+       Added new files.
+       * Makefile.in: Regenerated.
+       
  2002-11-11  Michael Koch <konqueror@gmx.de>
  
         * java/nio/charset/CharacterCodingException.java:
diff --git a/libjava/gnu/java/nio/charset/ISO_8859_1.java b/libjava/gnu/java/nio/charset/ISO_8859_1.java

new file mode 100644 (file)

index 0000000..f29fa26
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/ISO_8859_1.java
@@ -0,0 +1,132 @@
+/* ISO_8859_1.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * ISO-8859-1 charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class ISO_8859_1 extends Charset
+{
+  ISO_8859_1 ()
+  {
+    super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII || cs instanceof ISO_8859_1;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new Decoder (this);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new Encoder (this);
+  }
+
+  private static final class Decoder extends CharsetDecoder
+  {
+    private Decoder (Charset cs)
+    {
+      super (cs, 1.0f, 1.0f);
+    }
+
+    protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+    {
+      // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+      while (in.hasRemaining ())
+      {
+        byte b = in.get ();
+
+        if (!out.hasRemaining ())
+          {
+            in.position (in.position () - 1);
+            return CoderResult.OVERFLOW;
+          }
+
+        out.put ((char) (b & 0xFF));
+      }
+
+      return CoderResult.UNDERFLOW;
+    }
+  }
+
+  private static final class Encoder extends CharsetEncoder
+  {
+    private Encoder (Charset cs)
+    {
+      super (cs, 1.0f, 1.0f);
+    }
+
+    protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+    {
+      // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+      while (in.hasRemaining ())
+      {
+        char c = in.get ();
+
+        if (c > 0xFF)
+          {
+            in.position (in.position () - 1);
+            return CoderResult.unmappableForLength (1);
+          }
+        if (!out.hasRemaining ())
+          {
+            in.position (in.position () - 1);
+            return CoderResult.OVERFLOW;
+          }
+
+        out.put ((byte) c);
+      }
+
+      return CoderResult.UNDERFLOW;
+    }
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/Provider.java b/libjava/gnu/java/nio/charset/Provider.java

new file mode 100644 (file)

index 0000000..13f6371
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/Provider.java
@@ -0,0 +1,135 @@
+/* Provider.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.charset.Charset;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+
+/**
+ * Charset provider for the required charsets.  Used by
+ * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
+ *
+ * @author Jesse Rosenstock
+ * @see Charset
+ */
+public final class Provider extends CharsetProvider
+{
+  private static Provider singleton;
+
+  static
+  {
+    synchronized (Provider.class)
+      {
+        singleton = null;
+      }
+  }
+
+  /**
+   * Map from charset name to charset canonical name.
+   */
+  private final HashMap canonicalNames;
+
+  /**
+   * Map from canonical name to Charset.
+   * TODO: We may want to use soft references.  We would then need to keep
+   * track of the class name to regenerate the object.
+   */
+  private final HashMap charsets;
+
+  private Provider ()
+  {
+    // FIXME: We might need to make the name comparison case insensitive.
+    // Verify this with the Sun JDK.
+    canonicalNames = new HashMap ();
+    charsets = new HashMap ();
+
+    // US-ASCII aka ISO646-US
+    addCharset (new US_ASCII ());
+
+    // ISO-8859-1 aka ISO-LATIN-1
+    addCharset (new ISO_8859_1 ());
+
+    // UTF-8
+    addCharset (new UTF_8 ());
+
+    // UTF-16BE
+    addCharset (new UTF_16BE ());
+
+    // UTF-16LE
+    addCharset (new UTF_16LE ());
+
+    // UTF-16
+    addCharset (new UTF_16 ());
+  }
+
+  public Iterator charsets ()
+  {
+    return Collections.unmodifiableCollection (charsets.values ())
+                      .iterator ();
+  }
+
+  public Charset charsetForName (String charsetName)
+  {
+    return (Charset) charsets.get (canonicalize (charsetName));
+  }
+
+  private Object canonicalize (String charsetName)
+  {
+    Object o = canonicalNames.get (charsetName);
+    return o == null ? charsetName : o;
+  }
+
+  private void addCharset (Charset cs)
+  {
+    String canonicalName = cs.name ();
+    charsets.put (canonicalName, cs);
+
+    for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
+      canonicalNames.put (i.next (), canonicalName);
+  }
+
+  public static synchronized Provider provider ()
+  {
+    if (singleton == null)
+      singleton = new Provider ();
+    return singleton;
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/US_ASCII.java b/libjava/gnu/java/nio/charset/US_ASCII.java

new file mode 100644 (file)

index 0000000..a1ff251
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/US_ASCII.java
@@ -0,0 +1,137 @@
+/* US_ASCII.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * US-ASCII charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class US_ASCII extends Charset
+{
+  US_ASCII ()
+  {
+    super ("US-ASCII", new String[]{"ISO646-US"});
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new Decoder (this);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new Encoder (this);
+  }
+
+  private static final class Decoder extends CharsetDecoder
+  {
+    private Decoder (Charset cs)
+    {
+      super (cs, 1.0f, 1.0f);
+    }
+
+    protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+    {
+      // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+      while (in.hasRemaining ())
+        {
+          byte b = in.get ();
+
+          if (b < 0)
+            {
+              in.position (in.position () - 1);
+              return CoderResult.malformedForLength (1);
+            }
+          if (!out.hasRemaining ())
+            {
+              in.position (in.position () - 1);
+              return CoderResult.OVERFLOW;
+            }
+
+          out.put ((char) b);
+        }
+
+      return CoderResult.UNDERFLOW;
+    }
+  }
+
+  private static final class Encoder extends CharsetEncoder
+  {
+    private Encoder (Charset cs)
+    {
+      super (cs, 1.0f, 1.0f);
+    }
+
+    protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+    {
+      // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+      while (in.hasRemaining ())
+      {
+        char c = in.get ();
+
+        if (c > Byte.MAX_VALUE)
+          {
+            in.position (in.position () - 1);
+            return CoderResult.unmappableForLength (1);
+          }
+        if (!out.hasRemaining ())
+          {
+            in.position (in.position () - 1);
+            return CoderResult.OVERFLOW;
+          }
+
+        out.put ((byte) c);
+      }
+
+      return CoderResult.UNDERFLOW;
+    }
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16.java b/libjava/gnu/java/nio/charset/UTF_16.java

new file mode 100644 (file)

index 0000000..18c9be7
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16.java
@@ -0,0 +1,75 @@
+/* UTF_16.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16 charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16 extends Charset
+{
+  UTF_16 ()
+  {
+    super ("UTF-16", null);
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+      || cs instanceof UTF_8 || cs instanceof UTF_16BE
+      || cs instanceof UTF_16LE || cs instanceof UTF_16;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new UTF_16Decoder (this, UTF_16Decoder.UNKNOWN_ENDIAN);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, false);
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16BE.java b/libjava/gnu/java/nio/charset/UTF_16BE.java

new file mode 100644 (file)

index 0000000..6fb28cd
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16BE.java
@@ -0,0 +1,75 @@
+/* UTF_16BE.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16BE charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16BE extends Charset
+{
+  UTF_16BE ()
+  {
+    super ("UTF-16BE", null);
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+      || cs instanceof UTF_8 || cs instanceof UTF_16BE
+      || cs instanceof UTF_16LE || cs instanceof UTF_16;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new UTF_16Decoder (this, UTF_16Decoder.BIG_ENDIAN);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, true);
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16Decoder.java b/libjava/gnu/java/nio/charset/UTF_16Decoder.java

new file mode 100644 (file)

index 0000000..c8e474d
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16Decoder.java
@@ -0,0 +1,169 @@
+/* UTF_16Decoder.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * Decoder for UTF-16, UTF-15LE, and UTF-16BE.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16Decoder extends CharsetDecoder
+{
+  // byte orders
+  static final int BIG_ENDIAN = 0;
+  static final int LITTLE_ENDIAN = 1;
+  static final int UNKNOWN_ENDIAN = 2;
+
+  private static final char BYTE_ORDER_MARK = '\uFEFF';
+  private static final char REVERSED_BYTE_ORDER_MARK = '\uFFFE';
+
+  private final int originalByteOrder;
+  private int byteOrder;
+
+  UTF_16Decoder (Charset cs, int byteOrder)
+  {
+    super (cs, 0.5f, 1.0f);
+    this.originalByteOrder = byteOrder;
+    this.byteOrder = byteOrder;
+  }
+
+  protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+  {
+    // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+
+    int inPos = in.position ();
+    try
+      {
+        while (in.remaining () >= 2)
+          {
+            byte b1 = in.get ();
+            byte b2 = in.get ();
+
+            // handle byte order mark
+            if (byteOrder == UNKNOWN_ENDIAN)
+              {
+                char c = (char) ((b1 << 8) | b2);
+                if (c == BYTE_ORDER_MARK)
+                  {
+                    byteOrder = BIG_ENDIAN;
+                    inPos += 2;
+                    continue;
+                  }
+                else if (c == REVERSED_BYTE_ORDER_MARK)
+                  {
+                    byteOrder = LITTLE_ENDIAN;
+                    inPos += 2;
+                    continue;
+                  }
+                else
+                  {
+                    // assume big endian, do not consume bytes,
+                    // continue with normal processing
+                    byteOrder = BIG_ENDIAN;
+                  }
+              }
+
+            char c = byteOrder == BIG_ENDIAN ? (char) ((b1 << 8) | b2)
+                                             : (char) ((b2 << 8) | b1);
+
+            if (0xD800 <= c && c <= 0xDFFF)
+              {
+                // c is a surrogate
+                
+                // make sure c is a high surrogate
+                if (c > 0xDBFF)
+                  return CoderResult.malformedForLength (2);
+                if (in.remaining () < 2)
+                  return CoderResult.UNDERFLOW;
+                byte b3 = in.get ();
+                byte b4 = in.get ();
+                char d = byteOrder == BIG_ENDIAN ? (char) ((b3 << 8) | b4)
+                                                 : (char) ((b4 << 8) | b3);
+                // make sure d is a low surrogate
+                if (d < 0xDC00 || d > 0xDFFF)
+                  return CoderResult.malformedForLength (2);
+                out.put (c);
+                out.put (d);
+                inPos += 4;
+              }
+            else
+              {
+                if (!out.hasRemaining ())
+                  return CoderResult.UNDERFLOW;
+                out.put (c);
+                inPos += 2;
+              }
+          }
+
+        return CoderResult.UNDERFLOW;
+      }
+    finally
+      {
+        in.position (inPos);
+      }
+  }
+
+  /**
+   * Writes <code>c</code> to <code>out</code> in the byte order
+   * specified by <code>byteOrder</code>.
+   **/
+  private void put (ByteBuffer out, char c)
+  {
+    if (byteOrder == BIG_ENDIAN)
+      {
+        out.put ((byte) (c >> 8));
+        out.put ((byte) (c & 0xFF));
+      }
+    else
+      {
+        out.put ((byte) (c & 0xFF));
+        out.put ((byte) (c >> 8));
+      }
+  }
+
+  protected void implReset ()
+  {
+    byteOrder = originalByteOrder;
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16Encoder.java b/libjava/gnu/java/nio/charset/UTF_16Encoder.java

new file mode 100644 (file)

index 0000000..b0cb9ed
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16Encoder.java
@@ -0,0 +1,153 @@
+/* UTF_16Encoder.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * Encoder for UTF-16, UTF-15LE, and UTF-16BE.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16Encoder extends CharsetEncoder
+{
+  // byte orders
+  static final int BIG_ENDIAN = 0;
+  static final int LITTLE_ENDIAN = 1;
+
+  private static final char BYTE_ORDER_MARK = '\uFEFF';
+
+  private final int byteOrder;
+  private final boolean useByteOrderMark;
+  private boolean needsByteOrderMark;
+
+  UTF_16Encoder (Charset cs, int byteOrder, boolean useByteOrderMark)
+  {
+    super (cs, 2.0f,
+           useByteOrderMark ? 4.0f : 2.0f,
+           byteOrder == BIG_ENDIAN
+             ? new byte[] { (byte) 0xFF, (byte) 0xFD }
+             : new byte[] { (byte) 0xFD, (byte) 0xFF });
+    this.byteOrder = byteOrder;
+    this.useByteOrderMark = useByteOrderMark;
+    this.needsByteOrderMark = useByteOrderMark;
+  }
+
+  protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+  {
+    // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+
+    if (needsByteOrderMark)
+      {
+        if (out.remaining () < 2)
+          return CoderResult.OVERFLOW;
+        put (out, BYTE_ORDER_MARK);
+        needsByteOrderMark = false;
+      }
+
+    int inPos = in.position ();
+    try
+      {
+        while (in.hasRemaining ())
+          {
+            char c = in.get ();
+
+            if (0xD800 <= c && c <= 0xDFFF)
+              {
+                // c is a surrogate
+
+                // make sure c is a high surrogate
+                if (c > 0xDBFF)
+                  return CoderResult.malformedForLength (1);
+                if (in.remaining () < 1)
+                  return CoderResult.UNDERFLOW;
+                char d = in.get ();
+                // make sure d is a low surrogate
+                if (d < 0xDC00 || d > 0xDFFF)
+                  return CoderResult.malformedForLength (1);
+                put (out, c);
+                put (out, d);
+                inPos += 2;
+              }
+            else
+              {
+                if (out.remaining () < 2)
+                  return CoderResult.OVERFLOW;
+                put (out, c);
+                inPos++;
+              }
+          }
+
+        return CoderResult.UNDERFLOW;
+      }
+    finally
+      {
+        in.position (inPos);
+      }
+  }
+
+  /**
+   * Writes <code>c</code> to <code>out</code> in the byte order
+   * specified by <code>byteOrder</code>.
+   **/
+  private void put (ByteBuffer out, char c)
+  {
+    if (byteOrder == BIG_ENDIAN)
+      {
+        out.put ((byte) (c >> 8));
+        out.put ((byte) (c & 0xFF));
+      }
+    else
+      {
+        out.put ((byte) (c & 0xFF));
+        out.put ((byte) (c >> 8));
+      }
+  }
+
+  protected void implReset ()
+  {
+    needsByteOrderMark = useByteOrderMark;
+  }
+
+  // TODO: override canEncode(char) and canEncode(CharSequence)
+  // for performance
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16LE.java b/libjava/gnu/java/nio/charset/UTF_16LE.java

new file mode 100644 (file)

index 0000000..b914ae0
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16LE.java
@@ -0,0 +1,75 @@
+/* UTF_16LE.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16LE charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16LE extends Charset
+{
+  UTF_16LE ()
+  {
+    super ("UTF-16LE", null);
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+      || cs instanceof UTF_8 || cs instanceof UTF_16BE
+      || cs instanceof UTF_16LE || cs instanceof UTF_16;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new UTF_16Decoder (this, UTF_16Decoder.LITTLE_ENDIAN);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new UTF_16Encoder (this, UTF_16Encoder.LITTLE_ENDIAN, true);
+  }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_8.java b/libjava/gnu/java/nio/charset/UTF_8.java

new file mode 100644 (file)

index 0000000..aa623b2
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_8.java
@@ -0,0 +1,279 @@
+/* UTF_8.java -- 
+   Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-8 charset.
+ * 
+ * <p> UTF-8 references:
+ * <ul>
+ *   <li> <a href="http://ietf.org/rfc/rfc2279.txt">RFC 2279</a>
+ *   <li> The <a href="http://www.unicode.org/unicode/standard/standard.html">
+ *     Unicode standard</a> and 
+ *     <a href="http://www.unicode.org/versions/corrigendum1.html">
+ *      Corrigendum</a>
+ * </ul>
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_8 extends Charset
+{
+  UTF_8 ()
+  {
+    super ("UTF-8", null);
+  }
+
+  public boolean contains (Charset cs)
+  {
+    return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+      || cs instanceof UTF_8 || cs instanceof UTF_16BE
+      || cs instanceof UTF_16LE || cs instanceof UTF_16;
+  }
+
+  public CharsetDecoder newDecoder ()
+  {
+    return new Decoder (this);
+  }
+
+  public CharsetEncoder newEncoder ()
+  {
+    return new Encoder (this);
+  }
+
+  private static final class Decoder extends CharsetDecoder
+  {
+    private Decoder (Charset cs)
+    {
+      super (cs, 1.0f, 1.0f);
+    }
+
+    protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+    {
+      // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+      int inPos = 0;
+      try
+        {
+          while (in.hasRemaining ())
+            {
+              char c;
+              byte b1 = in.get ();
+              int highNibble = (b1 >> 4) & 0xF;
+
+              switch (highNibble)
+                {
+                  case 0: case 1: case 2: case 3:
+                  case 4: case 5: case 6: case 7:
+                    if (out.remaining () < 1)
+                      return CoderResult.OVERFLOW;
+                    out.put ((char) b1);
+                    inPos++;
+                    break;
+
+                  case 0xC: case 0xD:
+                    byte b2;
+                    if (in.remaining () < 1)
+                      return CoderResult.UNDERFLOW;
+                    if (out.remaining () < 1)
+                      return CoderResult.OVERFLOW;
+                    if (!isContinuation (b2 = in.get ()))
+                      return CoderResult.malformedForLength (1);
+                    c = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
+                    // check that we had the shortest encoding
+                    if (c <= 0x7F)
+                      return CoderResult.malformedForLength (2);
+                    out.put (c);
+                    inPos += 2;
+                    break;
+
+                  case 0xE:
+                    byte b3;
+                    if (in.remaining () < 2)
+                      return CoderResult.UNDERFLOW;
+                    if (out.remaining () < 1)
+                      return CoderResult.OVERFLOW;
+                    if (!isContinuation (b2 = in.get ()))
+                      return CoderResult.malformedForLength (1);
+                    if (!isContinuation (b3 = in.get ()))
+                      return CoderResult.malformedForLength (1);
+                    c = (char) (((b1 & 0x0F) << 12)
+                                | ((b2 & 0x3F) << 6)
+                                | (b3 & 0x3F));
+                    // check that we had the shortest encoding
+                    if (c <= 0x7FF)
+                      return CoderResult.malformedForLength (3);
+                    out.put (c);
+                    inPos += 3;
+                    break;
+
+                  default:
+                    return CoderResult.malformedForLength (1);
+                }
+            }
+
+          return CoderResult.UNDERFLOW;
+        }
+      finally
+        {
+          // In case we did a get(), then encountered an error, reset the
+          // position to before the error.  If there was no error, this
+          // will benignly reset the position to the value it already has.
+          in.position (inPos);
+        }
+    }
+
+    private static boolean isContinuation (byte b)
+    {
+      return (b & 0xC0) == 0x80;
+    }
+  }
+
+  private static final class Encoder extends CharsetEncoder
+  {
+    private Encoder (Charset cs)
+    {
+      // According to
+      // http://www-106.ibm.com/developerworks/unicode/library/utfencodingforms/index.html
+      //   On average, English takes slightly over one unit per code point.
+      //   Most Latin-script languages take about 1.1 bytes. Greek, Russian,
+      //   Arabic and Hebrew take about 1.7 bytes, and most others (including
+      //   Japanese, Chinese, Korean and Hindi) take about 3 bytes.
+      // We assume we will be dealing with latin scripts, and use 1.1 
+      // for averageBytesPerChar.
+      super (cs, 1.1f, 4.0f);
+    }
+
+    protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+    {
+      int inPos = 0;
+      try
+        {
+          // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+          while (in.hasRemaining ())
+          {
+            int remaining = out.remaining ();
+            char c = in.get ();
+
+            // UCS-4 range (hex.)           UTF-8 octet sequence (binary)
+            // 0000 0000-0000 007F   0xxxxxxx
+            // 0000 0080-0000 07FF   110xxxxx 10xxxxxx
+            // 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
+
+            //        Scalar Value          UTF-16                byte 1     byte 2     byte 3     byte 4
+            //        0000 0000 0xxx xxxx   0000 0000 0xxx xxxx   0xxx xxxx
+            //        0000 0yyy yyxx xxxx   0000 0yyy yyxx xxxx   110y yyyy  10xx xxxx
+            //        zzzz yyyy yyxx xxxx   zzzz yyyy yyxx xxxx   1110 zzzz  10yy yyyy  10xx xxxx
+            // u uuuu zzzz yyyy yyxx xxxx   1101 10ww wwzz zzyy   1111 0uuu  10uu zzzz  10yy yyyy  10xx xxxx
+            //                            + 1101 11yy yyxx xxxx
+            // Note: uuuuu = wwww + 1
+
+            if (c <= 0x7F)
+              {
+                if (remaining < 1)
+                  return CoderResult.OVERFLOW;
+                out.put ((byte) c);
+                inPos++;
+              }
+            else if (c <= 0x7FF)
+              {
+                if (remaining < 2)
+                  return CoderResult.OVERFLOW;
+                out.put ((byte) (0xC0 | (c >> 6)));
+                out.put ((byte) (0x80 | (c & 0x3F)));
+                inPos++;
+              }
+            else if (0xD800 <= c && c <= 0xDFFF)
+              {
+                if (remaining < 4)
+                  return CoderResult.OVERFLOW;
+
+                // we got a low surrogate without a preciding high one
+                if (c > 0xDBFF)
+                  return CoderResult.malformedForLength (1);
+
+                // high surrogates
+                if (!in.hasRemaining ())
+                  return CoderResult.UNDERFLOW;
+
+                char d = in.get ();
+
+                // make sure d is a low surrogate
+                if (d < 0xDC00 || d > 0xDFFF)
+                  return CoderResult.malformedForLength (1);
+
+                // make the 32 bit value
+                // int value2 = (c - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000;
+                int value = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
+                // assert value == value2;
+                out.put ((byte) (0xF0 | (value >> 18)));
+                out.put ((byte) (0x80 | ((value >> 12) & 0x3F)));
+                out.put ((byte) (0x80 | ((value >>  6) & 0x3F)));
+                out.put ((byte) (0x80 | ((value      ) & 0x3F)));
+                
+                inPos += 2;
+              }
+            else
+              {
+                if (remaining < 3)
+                  return CoderResult.OVERFLOW;
+
+                out.put ((byte) (0xE0 | (c >> 12)));
+                out.put ((byte) (0x80 | ((c >> 6) & 0x3F)));
+                out.put ((byte) (0x80 | (c & 0x3F)));
+                inPos++;
+              }
+          }
+
+          return CoderResult.UNDERFLOW;
+        }
+      finally
+        {
+          // In case we did a get(), then encountered an error, reset the
+          // position to before the error.  If there was no error, this
+          // will benignly reset the position to the value it already has.
+          in.position (inPos);
+        }
+    }
+  }
+}
author	mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)
committer	mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 11 Nov 2002 07:36:41 +0000 (07:36 +0000)
libjava/ChangeLog		patch \| blob \| history
libjava/gnu/java/nio/charset/ISO_8859_1.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/Provider.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/US_ASCII.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_16.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_16BE.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_16Decoder.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_16Encoder.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_16LE.java	[new file with mode: 0644]	patch \| blob
libjava/gnu/java/nio/charset/UTF_8.java	[new file with mode: 0644]	patch \| blob