Fix for PR gcj/33:

author tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>

Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)

committer tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>

Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)
author tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)
committer tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog

index 642f4a7..7b13f9a 100644 (file)
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,5 +1,46 @@
  2000-09-12  Tom Tromey  <tromey@cygnus.com>
  
+       Fix for PR gcj/33:
+       * jv-scan.c (help): Document --encoding.
+       (options): Added `encoding' entry.
+       (OPT_ENCODING): New define.
+       (main): Handle --encoding.
+       Include <langinfo.h> if nl_langinfo exists.
+       * lang-options.h: Document --classpath, --CLASSPATH, --main, and
+       --encoding.
+       * jcf-parse.c Include <langinfo.h> if we have nl_langinfo.
+       (parse_source_file): Correctly call java_init_lex.  Added `finput'
+       argument.  Use nl_langinfo to determine default encoding.
+       * java-tree.h (current_encoding): Declare.
+       * parse.y (java_parser_context_restore_global): Don't restore
+       `finput'.
+       (java_parser_context_save_global): Don't set `finput' field.
+       (java_pop_parser_context): Don't restore `finput'.  Free old lexer
+       if required.
+       * lang.c (current_encoding): New global.
+       (lang_decode_option): Recognize `-fencoding='.
+       (finish_parse): Don't close finput.
+       * parse.h (struct parser_ctxt): Removed `finput' and
+       `unget_utf8_value' fields.  Added `lexer' field.
+       (java_init_lex): Fixed declaration.
+       * lex.c (java_new_lexer): New function.
+       (java_destroy_lexer): Likewise.
+       (java_read_char): Added `lex' argument.  Handle iconv case.
+       (java_read_unicode): Added `lex' argument.  Count backslashes in
+       lexer structure.
+       (java_init_lex): Added `finput' and `encoding' arguments.  Set
+       `lexer' field in ctxp.
+       (BAD_UTF8_VALUE): Removed.
+       (java_lex): Handle seeing UEOF in the middle of a string literal.
+       * lex.h: Include <iconv.h> if HAVE_ICONV defined.
+       (java_lexer): New structure.
+       (UNGETC): Removed.
+       (GETC): Removed.
+       (DEFAULT_ENCODING): New define.
+       (java_destroy_lexer): Declare.
+
+2000-09-12  Tom Tromey  <tromey@cygnus.com>
+
         Fix for PR gcj/343:
         * lex.c (java_init_lex): Initialize java_io_serializable.
         * parse.y (java_io_serializable): New global.
diff --git a/gcc/java/java-tree.h b/gcc/java/java-tree.h

index 94fdcae..18cdf7a 100644 (file)
--- a/gcc/java/java-tree.h
+++ b/gcc/java/java-tree.h
@@ -169,6 +169,9 @@ extern int flag_use_boehm_gc;
     object to its synchronization structure.  */
  extern int flag_hash_synchronization;
  
+/* Encoding used for source files.  */
+extern char *current_encoding;
+
  /* The Java .class file that provides main_class;  the main input file. */
  extern struct JCF *current_jcf;
  
diff --git a/gcc/java/jcf-parse.c b/gcc/java/jcf-parse.c

index 02becc0..4b76f59 100644 (file)
--- a/gcc/java/jcf-parse.c
+++ b/gcc/java/jcf-parse.c
@@ -35,6 +35,10 @@ The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  #include "toplev.h"
  #include "parse.h"
  
+#ifdef HAVE_NL_LANGINFO
+#include <langinfo.h>
+#endif
+
  /* A CONSTANT_Utf8 element is converted to an IDENTIFIER_NODE at parse time. */
  #define JPOOL_UTF(JCF, INDEX) CPOOL_UTF(&(JCF)->cpool, INDEX)
  #define JPOOL_UTF_LENGTH(JCF, INDEX) IDENTIFIER_LENGTH (JPOOL_UTF (JCF, INDEX))
@@ -83,7 +87,7 @@ static struct JCF main_jcf[1];
  static tree give_name_to_class PARAMS ((JCF *jcf, int index));
  static void parse_zip_file_entries PARAMS ((void));
  static void process_zip_dir PARAMS ((void));
-static void parse_source_file PARAMS ((tree));
+static void parse_source_file PARAMS ((tree, FILE *));
  static void jcf_parse_source PARAMS ((void));
  static int jcf_figure_file_type PARAMS ((JCF *));
  static int find_in_current_zip PARAMS ((const char *, struct JCF **));
@@ -564,6 +568,7 @@ static void
  jcf_parse_source ()
  {
    tree file;
+  FILE *finput;
  
    java_parser_context_save_global ();
    java_push_parser_context ();
@@ -576,7 +581,7 @@ jcf_parse_source ()
        if (!(finput = fopen (input_filename, "r")))
         fatal ("input file `%s' just disappeared - jcf_parse_source",
                input_filename);
-      parse_source_file (file);
+      parse_source_file (file, finput);
        if (fclose (finput))
         fatal ("can't close input file `%s' stream - jcf_parse_source",
                input_filename);
@@ -754,8 +759,9 @@ parse_class_file ()
  /* Parse a source file, as pointed by the current value of INPUT_FILENAME. */
  
  static void
-parse_source_file (file)
+parse_source_file (file, finput)
       tree file;
+     FILE *finput;
  {
    int save_error_count = java_error_count;
    /* Mark the file as parsed */
@@ -765,7 +771,21 @@ parse_source_file (file)
  
    lang_init_source (1);                    /* Error msgs have no method prototypes */
  
-  java_init_lex ();                /* Initialize the parser */
+  /* There's no point in trying to find the current encoding unless we
+     are going to do something intelligent with it -- hence the test
+     for iconv.  */
+#ifdef HAVE_ICONV
+#ifdef HAVE_NL_LANGINFO
+  setlocale (LC_CTYPE, "");
+  if (current_encoding == NULL)
+    current_encoding = nl_langinfo (CODESET);
+#endif /* HAVE_NL_LANGINFO */
+#endif /* HAVE_ICONV */
+  if (current_encoding == NULL || *current_encoding == '\0')
+    current_encoding = DEFAULT_ENCODING;
+
+  /* Initialize the parser */
+  java_init_lex (finput, current_encoding);
    java_parse_abort_on_error ();
  
    java_parse ();                   /* Parse and build partial tree nodes. */
@@ -796,6 +816,7 @@ yyparse ()
    int several_files = 0;
    char *list = xstrdup (input_filename), *next;
    tree node, current_file_list = NULL_TREE;
+  FILE *finput;
  
    do 
      {
@@ -901,7 +922,7 @@ yyparse ()
         case JCF_SOURCE:
           java_push_parser_context ();
           java_parser_context_save_global ();
-         parse_source_file (name);
+         parse_source_file (name, finput);
           java_parser_context_restore_global ();
           java_pop_parser_context (1);
           break;
diff --git a/gcc/java/jv-scan.c b/gcc/java/jv-scan.c

index adb7ba3..ae9c91d 100644 (file)
--- a/gcc/java/jv-scan.c
+++ b/gcc/java/jv-scan.c
@@ -26,6 +26,10 @@ Boston, MA 02111-1307, USA.  */
  
  #include "version.h"
  
+#ifdef HAVE_NL_LANGINFO
+#include <langinfo.h>
+#endif
+
  #include <getopt.h>
  
  void fatal PARAMS ((const char *s, ...)) ATTRIBUTE_PRINTF_1 ATTRIBUTE_NORETURN;
@@ -61,6 +65,7 @@ int flag_list_filename = 0;
  
  #define OPT_HELP      LONG_OPT (0)
  #define OPT_VERSION   LONG_OPT (1)
+#define OPT_ENCODING  LONG_OPT (2)
  
  static struct option options[] =
  {
@@ -69,6 +74,7 @@ static struct option options[] =
    { "print-main", no_argument,      &flag_find_main, 1 },
    { "list-filename", no_argument,   &flag_list_filename, 1 },
    { "list-class", no_argument,      &flag_dump_class, 1 },
+  { "encoding",  required_argument, NULL, OPT_ENCODING },
    { NULL,        no_argument,       NULL, 0 }
  };
  
@@ -84,6 +90,7 @@ help ()
  {
    printf ("Usage: jv-scan [OPTION]... FILE...\n\n");
    printf ("Print useful information read from Java source files.\n\n");
+  printf ("  --encoding NAME         Specify encoding of input file\n");
    printf ("  --print-main            Print name of class containing `main'\n");
    printf ("  --list-class            List all classes defined in file\n");
    printf ("  --list-filename         Print input filename when listing class names\n");
@@ -114,6 +121,7 @@ DEFUN (main, (argc, argv),
  {
    int i = 1;
    const char *output_file = NULL;
+  const char *encoding = NULL;
    long ft;
    int opt;
  
@@ -144,6 +152,10 @@ DEFUN (main, (argc, argv),
           version ();
           break;
  
+       case OPT_ENCODING:
+         encoding = optarg;
+         break;
+
         default:
           usage ();
           break;
@@ -172,7 +184,20 @@ DEFUN (main, (argc, argv),
         input_filename = argv [i];
         if ( (finput = fopen (argv [i], "r")) )
           {
-           java_init_lex ();
+           /* There's no point in trying to find the current encoding
+              unless we are going to do something intelligent with it
+              -- hence the test for iconv.  */
+#ifdef HAVE_ICONV
+#ifdef HAVE_NL_LANGINFO
+           setlocale (LC_CTYPE, "");
+           if (encoding == NULL)
+             encoding = nl_langinfo (CODESET);
+#endif /* HAVE_NL_LANGINFO */
+#endif /* HAVE_ICONV */
+           if (encoding == NULL || *encoding == '\0')
+             encoding = DEFAULT_ENCODING;
+
+           java_init_lex (finput, encoding);
             yyparse ();
             if (ftell (out) != ft)
               fputc ('\n', out);
diff --git a/gcc/java/lang-options.h b/gcc/java/lang-options.h

index 630e6d8..2b207dc 100644 (file)
--- a/gcc/java/lang-options.h
+++ b/gcc/java/lang-options.h
@@ -42,8 +42,10 @@ DEFINE_LANG_NAME ("Java")
    { "-M", "Print dependencies to stdout" },
    { "-MM", "Print dependencies to stdout" },
  #endif /* ! USE_CPPLIB */
-  { "-fclasspath", "Set class path and suppress system path" },
-  { "-fCLASSPATH", "Set class path" },
+  { "--classpath", "Set class path and suppress system path" },
+  { "--CLASSPATH", "Set class path" },
+  { "--main", "Choose class whose main method should be used" },
+  { "--encoding", "Choose input encoding (default is UTF-8)" },
    { "-I", "Add directory to class path" },
    { "-foutput-class-dir", "Directory where class files should be written" },
    { "-fuse-divide-subroutine", "" },
diff --git a/gcc/java/lang.c b/gcc/java/lang.c

index 5f95f2d..2dc33f2 100644 (file)
--- a/gcc/java/lang.c
+++ b/gcc/java/lang.c
@@ -121,6 +121,9 @@ int flag_hash_synchronization;
     JNI, not CNI.  */
  int flag_jni = 0;
  
+/* The encoding of the source file.  */
+char *current_encoding = NULL;
+
  /* When non zero, report the now deprecated empty statements.  */
  int flag_extraneous_semicolon;
  
@@ -222,6 +225,13 @@ lang_decode_option (argc, argv)
        return 1;
      }
  #undef ARG
+#define ARG "-fencoding="
+  if (strncmp (p, ARG, sizeof (ARG) - 1) == 0)
+    {
+      current_encoding = p + sizeof (ARG) - 1;
+      return 1;
+    }
+#undef ARG
  
    if (p[0] == '-' && p[1] == 'f')
      {
@@ -309,7 +319,9 @@ lang_decode_option (argc, argv)
    return 0;
  }
  
+/* Global open file.  */
  FILE *finput;
+
  const char *
  init_parse (filename)
       const char *filename;
@@ -362,6 +374,7 @@ init_parse (filename)
             }
         }
      }
+
    init_lex ();
  
    return filename;
@@ -370,7 +383,6 @@ init_parse (filename)
  void
  finish_parse ()
  {
-  fclose (finput);
    jcf_dependency_write ();
  }
  
diff --git a/gcc/java/lex.c b/gcc/java/lex.c

index 535733f..4179b1d 100644 (file)
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -24,15 +24,15 @@ of Sun Microsystems, Inc. in the United States and other countries.
  The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  
  /* It defines java_lex (yylex) that reads a Java ASCII source file
-possibly containing Unicode escape sequence or utf8 encoded characters
-and returns a token for everything found but comments, white spaces
-and line terminators. When necessary, it also fills the java_lval
-(yylval) union. It's implemented to be called by a re-entrant parser
-generated by Bison.
+   possibly containing Unicode escape sequence or utf8 encoded
+   characters and returns a token for everything found but comments,
+   white spaces and line terminators. When necessary, it also fills
+   the java_lval (yylval) union. It's implemented to be called by a
+   re-entrant parser generated by Bison.
  
-The lexical analysis conforms to the Java grammar described in "The
-Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
-Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html)  */
+   The lexical analysis conforms to the Java grammar described in "The
+   Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
+   Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  
  #include "keyword.h"
  
@@ -55,15 +55,18 @@ static int java_letter_or_digit_p PARAMS ((unicode_t));
  static int java_parse_doc_section PARAMS ((unicode_t));
  static void java_parse_end_comment PARAMS ((unicode_t));
  static unicode_t java_get_unicode PARAMS ((void));
-static unicode_t java_read_unicode PARAMS ((int, int *));
+static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
  static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_read_char PARAMS ((void));
+static unicode_t java_read_char PARAMS ((java_lexer *));
  static void java_allocate_new_line PARAMS ((void));
  static void java_unget_unicode PARAMS ((void));
  static unicode_t java_sneak_unicode PARAMS ((void));
+java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  
  void
-java_init_lex ()
+java_init_lex (finput, encoding)
+     FILE *finput;
+     const char *encoding;
  {
  #ifndef JC1_LITE
    int java_lang_imported = 0;
@@ -114,9 +117,9 @@ java_init_lex ()
    ctxp->lineno = lineno = 0;
    ctxp->p_line = NULL;
    ctxp->c_line = NULL;
-  ctxp->unget_utf8_value = 0;
    ctxp->minus_seen = 0;
    ctxp->java_error_flag = 0;
+  ctxp->lexer = java_new_lexer (finput, encoding);
  }
  
  static char *
@@ -194,59 +197,180 @@ java_allocate_new_line ()
    ctxp->c_line->white_space_only = 1;
  }
  
-#define BAD_UTF8_VALUE 0xFFFE
-
-static unicode_t
-java_read_char ()
+/* Create a new lexer object.  */
+java_lexer *
+java_new_lexer (finput, encoding)
+     FILE *finput;
+     const char *encoding;
  {
-  int c;
-  int c1, c2;
+  java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
+  int enc_error = 0;
+
+  lex->finput = finput;
+  lex->bs_count = 0;
+  lex->unget_value = 0;
  
-  if (ctxp->unget_utf8_value)
+#ifdef HAVE_ICONV
+  lex->handle = iconv_open ("UCS-2", encoding);
+  if (lex->handle == (iconv_t) -1)
      {
-      int to_return = ctxp->unget_utf8_value;
-      ctxp->unget_utf8_value = 0;
-      return (to_return);
+      /* FIXME: we should give a nice error based on errno here.  */
+      enc_error = 1;
      }
+  lex->first = -1;
+  lex->last = -1;
+#else /* HAVE_ICONV */
+  if (strcmp (encoding, DEFAULT_ENCODING))
+    enc_error = 1;
+#endif /* HAVE_ICONV */
  
-  c = GETC ();
+  if (enc_error)
+    fatal ("unknown encoding: `%s'", encoding);
  
-  if (c < 128)
-    return (unicode_t)c;
-  if (c == EOF)
-    return UEOF;
-  else
+  return lex;
+}
+
+void
+java_destroy_lexer (lex)
+     java_lexer *lex;
+{
+#ifdef HAVE_ICONV
+  iconv_close (lex->handle);
+#endif
+  free (lex);
+}
+
+static unicode_t
+java_read_char (lex)
+     java_lexer *lex;
+{
+  if (lex->unget_value)
      {
-      if ((c & 0xe0) == 0xc0)
-        {
-          c1 = GETC ();
-         if ((c1 & 0xc0) == 0x80)
-           return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
-         c = c1;
-       }
-      else if ((c & 0xf0) == 0xe0)
-        {
-          c1 = GETC ();
-         if ((c1 & 0xc0) == 0x80)
-           {
-             c2 = GETC ();
-             if ((c2 & 0xc0) == 0x80)
-               return (unicode_t)(((c & 0xf) << 12) + 
-                                  (( c1 & 0x3f) << 6) + (c2 & 0x3f));
-             else
-               c = c2;
-           }
-         else
-           c = c1;
-       }
-      /* We looked for a UTF8 multi-byte sequence (since we saw an initial
-        byte with the high bit set), but found invalid bytes instead.
-        If the most recent byte was Ascii (and not EOF), we should
-        unget it, in case it was a comment terminator or other delimitor. */
-      if ((c & 0x80) == 0)
-       UNGETC (c);
-      return BAD_UTF8_VALUE;
+      unicode_t r = lex->unget_value;
+      lex->unget_value = 0;
+      return r;
      }
+
+#ifdef HAVE_ICONV
+  {
+    char out[2];
+    size_t ir, inbytesleft, in_save, out_count;
+    char *inp, *outp;
+
+    while (1)
+      {
+       /* See if we need to read more data.  If FIRST == 0 then the
+          previous conversion attempt ended in the middle of a
+          character at the end of the buffer.  Otherwise we only have
+          to read if the buffer is empty.  */
+       if (lex->first == 0 || lex->first >= lex->last)
+         {
+           int r;
+
+           if (lex->first >= lex->last)
+             {
+               lex->first = 0;
+               lex->last = 0;
+             }
+           if (feof (lex->finput))
+             return UEOF;
+           r = fread (&lex->buffer[lex->last], 1,
+                      sizeof (lex->buffer) - lex->last,
+                      lex->finput);
+           lex->last += r;
+         }
+
+       inbytesleft = lex->last - lex->first;
+
+       if (inbytesleft == 0)
+         {
+           /* We've tried to read and there is nothing left.  */
+           return UEOF;
+         }
+
+       in_save = inbytesleft;
+       out_count = 2;
+       inp = &lex->buffer[lex->first];
+       outp = out;
+       ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+                   &outp, &out_count);
+       lex->first += in_save - inbytesleft;
+
+       if (out_count == 0)
+         {
+           /* Success.  We assume that UCS-2 is big-endian.  This
+              appears to be an ok assumption.  */
+           unicode_t result;
+           result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
+           return result;
+         }
+
+       if (ir == (size_t) -1)
+         {
+           if (errno == EINVAL)
+             {
+               /* This is ok.  This means that the end of our buffer
+                  is in the middle of a character sequence.  We just
+                  move the valid part of the buffer to the beginning
+                  to force a read.  */
+               /* We use bcopy() because it should work for
+                  overlapping strings.  Use memmove() instead... */
+               bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+                      lex->last - lex->first);
+               lex->last -= lex->first;
+               lex->first = 0;
+             }
+           else
+             {
+               /* A more serious error.  */
+               java_lex_error ("unrecognized character in input stream", 0);
+               return UEOF;
+             }
+         }
+      }
+  }
+#else /* HAVE_ICONV */
+  {
+    int c, c1, c2;
+    c = getc (lex->finput);
+
+    if (c < 128)
+      return (unicode_t)c;
+    if (c == EOF)
+      return UEOF;
+    else
+      {
+       if ((c & 0xe0) == 0xc0)
+         {
+           c1 = getc (lex->finput);
+           if ((c1 & 0xc0) == 0x80)
+             return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+           c = c1;
+         }
+       else if ((c & 0xf0) == 0xe0)
+         {
+           c1 = getc (lex->finput);
+           if ((c1 & 0xc0) == 0x80)
+             {
+               c2 = getc (lex->finput);
+               if ((c2 & 0xc0) == 0x80)
+                 return (unicode_t)(((c & 0xf) << 12) + 
+                                    (( c1 & 0x3f) << 6) + (c2 & 0x3f));
+               else
+                 c = c2;
+             }
+           else
+             c = c1;
+         }
+
+       /* We simply don't support invalid characters.  */
+       java_lex_error ("malformed UTF-8 character", 0);
+      }
+  }
+#endif /* HAVE_ICONV */
+
+  /* We only get here on error.  */
+  return UEOF;
  }
  
  static void
@@ -267,56 +391,54 @@ java_store_unicode (l, c, unicode_escape_p)
  }
  
  static unicode_t
-java_read_unicode (term_context, unicode_escape_p)
-    int term_context;
-    int *unicode_escape_p;
+java_read_unicode (lex, term_context, unicode_escape_p)
+     java_lexer *lex;
+     int term_context;
+     int *unicode_escape_p;
  {
    unicode_t c;
-  long i, base;
  
-  c = java_read_char ();
+  c = java_read_char (lex);
    *unicode_escape_p = 0;
  
    if (c != '\\')
-    return ((term_context ? c : 
-            java_lineterminator (c) ? '\n' : (unicode_t)c));
-
-  /* Count the number of preceeding '\' */
-  for (base = ftell (finput), i = base-2; c == '\\';)
-    { 
-      fseek (finput, i--, SEEK_SET);
-      c = java_read_char ();   /* Will fail if reading utf8 stream. FIXME */
+    {
+      lex->bs_count = 0;
+      return (term_context ? c : (java_lineterminator (c)
+                                 ? '\n'
+                                 : (unicode_t) c));
      }
-  fseek (finput, base, SEEK_SET);
-  if ((base-i-3)%2 == 0)       /* If odd number of \ seen */
+
+  ++lex->bs_count;
+  if ((lex->bs_count) % 2 == 1)
      {
-      c = java_read_char ();
+      /* Odd number of \ seen.  */
+      c = java_read_char (lex);
        if (c == 'u')
          {
-         unsigned short unicode = 0;
+         unicode_t unicode = 0;
           int shift = 12;
           /* Next should be 4 hex digits, otherwise it's an error.
              The hex value is converted into the unicode, pushed into
              the Unicode stream.  */
           for (shift = 12; shift >= 0; shift -= 4)
             {
-             if ((c = java_read_char ()) == UEOF)
+             if ((c = java_read_char (lex)) == UEOF)
                 return UEOF;
               if (c >= '0' && c <= '9')
                 unicode |= (unicode_t)((c-'0') << shift);
               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
               else
-                 java_lex_error 
-                   ("Non hex digit in Unicode escape sequence", 0);
+               java_lex_error ("Non hex digit in Unicode escape sequence", 0);
             }
           *unicode_escape_p = 1;
-         return (term_context ? unicode :
-                 (java_lineterminator (c) ? '\n' : unicode));
+         return (term_context
+                 ? unicode : (java_lineterminator (c) ? '\n' : unicode));
         }
-      ctxp->unget_utf8_value = c;
+      lex->unget_value = c;
      }
-  return (unicode_t)'\\';
+  return (unicode_t) '\\';
  }
  
  static unicode_t
@@ -331,7 +453,7 @@ java_get_unicode ()
         for (;;)
           {
             int unicode_escape_p;
-           c = java_read_unicode (0, &unicode_escape_p);
+           c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
             if (ctxp->c_line->white_space_only 
                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
@@ -354,7 +476,7 @@ java_lineterminator (c)
    else if (c == '\r')          /* CR */
      {
        int unicode_escape_p;
-      c = java_read_unicode (1, &unicode_escape_p);
+      c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
        if (c == '\r')
         {
           /* In this case we will have another terminator.  For some
@@ -363,7 +485,7 @@ java_lineterminator (c)
              up in the actual text of the line, causing an error.  So
              instead we choose a very low-level method.  FIXME: this
              is incredibly ugly.  */
-         UNGETC (c);
+         ctxp->lexer->unget_value = c;
         }
        else if (c != '\n')
         {
@@ -939,7 +1061,7 @@ java_lex (java_lval)
        char *string;
  
        for (no_error = 1, c = java_get_unicode (); 
-          c != '"' && c != '\n'; c = java_get_unicode ())
+          c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
         {
           if (c == '\\')
             c = java_parse_escape_sequence ();
diff --git a/gcc/java/lex.h b/gcc/java/lex.h

index d4754ab..cf29aa1 100644 (file)
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -35,6 +35,13 @@ extern int   lineno;
  /* A Unicode character, as read from the input file  */
  typedef unsigned short unicode_t;
  
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif /* HAVE_ICONV */
+
+/* Default encoding to use if no encoding is specified.  */
+#define DEFAULT_ENCODING "UTF-8"
+
  /* Debug macro to print-out what we match  */
  #ifdef JAVA_LEX_DEBUG
  #ifdef JAVA_LEX_DEBUG_CHAR
@@ -96,12 +103,38 @@ typedef struct _java_lc {
    int col;
  } java_lc;
  
+typedef struct java_lexer
+{
+  /* The file from which we're reading.  */
+  FILE *finput;
  
-#define JAVA_LINE_MAX 80
+  /* Number of consecutive backslashes we've read.  */
+  int bs_count;
+
+  /* If nonzero, a value that was pushed back.  */
+  unicode_t unget_value;
+
+#ifdef HAVE_ICONV
+  /* The handle for the iconv converter we're using.  */
+  iconv_t handle;
  
-/* Macro to read and unread bytes */
-#define UNGETC(c) ungetc(c, finput)
-#define GETC()    getc(finput)
+  /* Bytes we've read from the file but have not sent to iconv.  */
+  char buffer[1024];
+
+  /* Index of first valid character in buffer, -1 if no valid
+     characters.  */
+  int first;
+
+  /* Index of last valid character in buffer, plus one.  -1 if no
+     valid characters in buffer.  */
+  int last;
+#endif /* HAVE_ICONV */
+} java_lexer;
+
+/* Destroy a lexer object.  */
+extern void java_destroy_lexer PARAMS ((java_lexer *));
+
+#define JAVA_LINE_MAX 80
  
  /* Build a location compound integer */
  #define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
diff --git a/gcc/java/parse.h b/gcc/java/parse.h

index 8071237..b1b0e8e 100644 (file)
--- a/gcc/java/parse.h
+++ b/gcc/java/parse.h
@@ -728,13 +728,12 @@ typedef struct _jdeplist {
  struct parser_ctxt {
  
    const char *filename;                    /* Current filename */
-  FILE *finput;                            /* Current file input stream */
    struct parser_ctxt *next;
  
+  java_lexer *lexer;                /* Current lexer state */
    char marker_begining;                     /* Marker. Should be a sub-struct */
    struct java_line *p_line, *c_line; /* Previous and current line */
    java_lc elc;                      /* Error's line column info */
-  unicode_t unget_utf8_value;        /* An unget utf8 value */
    int ccb_indent;                   /* Keep track of {} indent, lexer */
    int first_ccb_indent1;            /* First { at ident level 1 */
    int last_ccb_indent1;                     /* Last } at ident level 1 */
@@ -928,7 +927,7 @@ extern void reset_report PARAMS ((void));
  /* Always in use, no matter what you compile */
  void java_push_parser_context PARAMS ((void));
  void java_pop_parser_context PARAMS ((int));
-void java_init_lex PARAMS ((void));
+void java_init_lex PARAMS ((FILE *, const char *));
  extern void java_parser_context_save_global PARAMS ((void));
  extern void java_parser_context_restore_global PARAMS ((void));
  int yyparse PARAMS ((void));
diff --git a/gcc/java/parse.y b/gcc/java/parse.y

index 9c92e58..42f4206 100644 (file)
--- a/gcc/java/parse.y
+++ b/gcc/java/parse.y
@@ -2618,10 +2618,13 @@ java_pop_parser_context (generate)
        next->incomplete_class = ctxp->incomplete_class;
        next->gclass_list = ctxp->gclass_list;
        lineno = ctxp->lineno;
-      finput = ctxp->finput;
        current_class = ctxp->current_class;
      }
  
+  /* If the old and new lexers differ, then free the old one.  */
+  if (ctxp->lexer && next && ctxp->lexer != next->lexer)
+    java_destroy_lexer (ctxp->lexer);
+
    /* Set the single import class file flag to 0 for the current list
       of imported things */
    for (current = ctxp->import_list; current; current = TREE_CHAIN (current))
@@ -2661,7 +2664,6 @@ java_parser_context_save_global ()
    else if (ctxp->saved_data)
      create_new_parser_context (1);
  
-  ctxp->finput = finput;
    ctxp->lineno = lineno;
    ctxp->current_class = current_class;
    ctxp->filename = input_filename;
@@ -2675,7 +2677,6 @@ java_parser_context_save_global ()
  void
  java_parser_context_restore_global ()
  {
-  finput = ctxp->finput;
    lineno = ctxp->lineno;
    current_class = ctxp->current_class;
    input_filename = ctxp->filename;
author	tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
	Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)
committer	tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>
	Tue, 12 Sep 2000 22:23:59 +0000 (22:23 +0000)
gcc/java/ChangeLog		patch \| blob \| history
gcc/java/java-tree.h		patch \| blob \| history
gcc/java/jcf-parse.c		patch \| blob \| history
gcc/java/jv-scan.c		patch \| blob \| history
gcc/java/lang-options.h		patch \| blob \| history
gcc/java/lang.c		patch \| blob \| history
gcc/java/lex.c		patch \| blob \| history
gcc/java/lex.h		patch \| blob \| history
gcc/java/parse.h		patch \| blob \| history
gcc/java/parse.y		patch \| blob \| history