gcc/ChangeLog:

[pf3gnuchains/gcc-fork.git] / gcc / ada / scng.adb
diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb

index 158524d..af1f3bb 100644 (file)
--- a/gcc/ada/scng.adb
+++ b/gcc/ada/scng.adb
@@ -6,18 +6,17 @@
  --                                                                          --
  --                                 B o d y                                  --
  --                                                                          --
---          Copyright (C) 1992-2005 Free Software Foundation, Inc.          --
+--          Copyright (C) 1992-2009, Free Software Foundation, Inc.         --
  --                                                                          --
  -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  -- terms of the  GNU General Public License as published  by the Free Soft- --
--- ware  Foundation;  either version 2,  or (at your option) any later ver- --
+-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  -- for  more details.  You should have  received  a copy of the GNU General --
--- Public License  distributed with GNAT;  see file COPYING.  If not, write --
--- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
--- MA 02111-1307, USA.                                                      --
+-- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
+-- http://www.gnu.org/licenses for a complete copy of the license.          --
  --                                                                          --
  -- GNAT was originally developed  by the GNAT team at  New York University. --
  -- Extensive contributions were provided by Ada Core Technologies Inc.      --
@@ -26,6 +25,7 @@
  
  with Csets;    use Csets;
  with Err_Vars; use Err_Vars;
+with Hostparm; use Hostparm;
  with Namet;    use Namet;
  with Opt;      use Opt;
  with Scans;    use Scans;
@@ -38,6 +38,7 @@ with Urealp;   use Urealp;
  with Widechar; use Widechar;
  
  with System.CRC32;
+with System.UTF_32;  use System.UTF_32;
  with System.WCh_Con; use System.WCh_Con;
  
  package body Scng is
@@ -95,7 +96,8 @@ package body Scng is
     procedure Accumulate_Checksum (C : Char_Code) is
     begin
        if C > 16#FFFF# then
-         Accumulate_Checksum (Character'Val (C / 2 ** 16));
+         Accumulate_Checksum (Character'Val (C / 2 ** 24));
+         Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
           Accumulate_Checksum (Character'Val ((C / 256) mod 256));
        else
           Accumulate_Checksum (Character'Val (C / 256));
@@ -137,113 +139,16 @@ package body Scng is
     -- Initialize_Scanner --
     ------------------------
  
-   procedure Initialize_Scanner
-     (Unit  : Unit_Number_Type;
-      Index : Source_File_Index)
-   is
-      procedure Set_Reserved (N : Name_Id; T : Token_Type);
-      pragma Inline (Set_Reserved);
-      --  Set given name as a reserved keyword (T is the corresponding token)
-
-      -------------
-      -- Set_NTB --
-      -------------
-
-      procedure Set_Reserved (N : Name_Id; T : Token_Type) is
-      begin
-         --  Set up Token_Type values in Names Table entries for reserved
-         --  keywords We use the Pos value of the Token_Type value. Note we
-         --  rely on the fact that Token_Type'Val (0) is not a reserved word!
-
-         Set_Name_Table_Byte (N, Token_Type'Pos (T));
-      end Set_Reserved;
-
-   --  Start of processing for Initialize_Scanner
-
+   procedure Initialize_Scanner (Index : Source_File_Index) is
     begin
        --  Establish reserved words
  
-      Set_Reserved (Name_Abort,     Tok_Abort);
-      Set_Reserved (Name_Abs,       Tok_Abs);
-      Set_Reserved (Name_Abstract,  Tok_Abstract);
-      Set_Reserved (Name_Accept,    Tok_Accept);
-      Set_Reserved (Name_Access,    Tok_Access);
-      Set_Reserved (Name_And,       Tok_And);
-      Set_Reserved (Name_Aliased,   Tok_Aliased);
-      Set_Reserved (Name_All,       Tok_All);
-      Set_Reserved (Name_Array,     Tok_Array);
-      Set_Reserved (Name_At,        Tok_At);
-      Set_Reserved (Name_Begin,     Tok_Begin);
-      Set_Reserved (Name_Body,      Tok_Body);
-      Set_Reserved (Name_Case,      Tok_Case);
-      Set_Reserved (Name_Constant,  Tok_Constant);
-      Set_Reserved (Name_Declare,   Tok_Declare);
-      Set_Reserved (Name_Delay,     Tok_Delay);
-      Set_Reserved (Name_Delta,     Tok_Delta);
-      Set_Reserved (Name_Digits,    Tok_Digits);
-      Set_Reserved (Name_Do,        Tok_Do);
-      Set_Reserved (Name_Else,      Tok_Else);
-      Set_Reserved (Name_Elsif,     Tok_Elsif);
-      Set_Reserved (Name_End,       Tok_End);
-      Set_Reserved (Name_Entry,     Tok_Entry);
-      Set_Reserved (Name_Exception, Tok_Exception);
-      Set_Reserved (Name_Exit,      Tok_Exit);
-      Set_Reserved (Name_For,       Tok_For);
-      Set_Reserved (Name_Function,  Tok_Function);
-      Set_Reserved (Name_Generic,   Tok_Generic);
-      Set_Reserved (Name_Goto,      Tok_Goto);
-      Set_Reserved (Name_If,        Tok_If);
-      Set_Reserved (Name_In,        Tok_In);
-      Set_Reserved (Name_Is,        Tok_Is);
-      Set_Reserved (Name_Limited,   Tok_Limited);
-      Set_Reserved (Name_Loop,      Tok_Loop);
-      Set_Reserved (Name_Mod,       Tok_Mod);
-      Set_Reserved (Name_New,       Tok_New);
-      Set_Reserved (Name_Not,       Tok_Not);
-      Set_Reserved (Name_Null,      Tok_Null);
-      Set_Reserved (Name_Of,        Tok_Of);
-      Set_Reserved (Name_Or,        Tok_Or);
-      Set_Reserved (Name_Others,    Tok_Others);
-      Set_Reserved (Name_Out,       Tok_Out);
-      Set_Reserved (Name_Package,   Tok_Package);
-      Set_Reserved (Name_Pragma,    Tok_Pragma);
-      Set_Reserved (Name_Private,   Tok_Private);
-      Set_Reserved (Name_Procedure, Tok_Procedure);
-      Set_Reserved (Name_Protected, Tok_Protected);
-      Set_Reserved (Name_Raise,     Tok_Raise);
-      Set_Reserved (Name_Range,     Tok_Range);
-      Set_Reserved (Name_Record,    Tok_Record);
-      Set_Reserved (Name_Rem,       Tok_Rem);
-      Set_Reserved (Name_Renames,   Tok_Renames);
-      Set_Reserved (Name_Requeue,   Tok_Requeue);
-      Set_Reserved (Name_Return,    Tok_Return);
-      Set_Reserved (Name_Reverse,   Tok_Reverse);
-      Set_Reserved (Name_Select,    Tok_Select);
-      Set_Reserved (Name_Separate,  Tok_Separate);
-      Set_Reserved (Name_Subtype,   Tok_Subtype);
-      Set_Reserved (Name_Tagged,    Tok_Tagged);
-      Set_Reserved (Name_Task,      Tok_Task);
-      Set_Reserved (Name_Terminate, Tok_Terminate);
-      Set_Reserved (Name_Then,      Tok_Then);
-      Set_Reserved (Name_Type,      Tok_Type);
-      Set_Reserved (Name_Until,     Tok_Until);
-      Set_Reserved (Name_Use,       Tok_Use);
-      Set_Reserved (Name_When,      Tok_When);
-      Set_Reserved (Name_While,     Tok_While);
-      Set_Reserved (Name_With,      Tok_With);
-      Set_Reserved (Name_Xor,       Tok_Xor);
-
-      --  Ada 2005 reserved words
-
-      Set_Reserved (Name_Interface,     Tok_Interface);
-      Set_Reserved (Name_Overriding,    Tok_Overriding);
-      Set_Reserved (Name_Synchronized,  Tok_Synchronized);
+      Scans.Initialize_Ada_Keywords;
  
        --  Initialize scan control variables
  
        Current_Source_File       := Index;
        Source                    := Source_Text (Current_Source_File);
-      Current_Source_Unit       := Unit;
        Scan_Ptr                  := Source_First (Current_Source_File);
        Token                     := No_Token;
        Token_Ptr                 := Scan_Ptr;
@@ -254,6 +159,7 @@ package body Scng is
        First_Non_Blank_Location  := Scan_Ptr;
  
        Initialize_Checksum;
+      Wide_Char_Byte_Count := 0;
  
        --  Do not call Scan, otherwise the License stuff does not work in Scn
  
@@ -330,29 +236,78 @@ package body Scng is
        --  Scan_Ptr points to the opening string quote (the checksum for this
        --  character has not been accumulated yet). On return Scan_Ptr points
        --  past the closing quote of the string literal, Token and Token_Node
-      --  are set appropriately, and the checksum is upated.
+      --  are set appropriately, and the checksum is updated.
  
        -----------------------
        -- Check_End_Of_Line --
        -----------------------
  
        procedure Check_End_Of_Line is
-         Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
+         Len : constant Int :=
+                 Int (Scan_Ptr) -
+                 Int (Current_Line_Start) -
+                 Wide_Char_Byte_Count;
  
        begin
-         if Style_Check and Style_Check_Max_Line_Length then
+         if Style_Check then
              Style.Check_Line_Terminator (Len);
+         end if;
+
+         --  Deal with checking maximum line length
+
+         if Style_Check and Style_Check_Max_Line_Length then
+            Style.Check_Line_Max_Length (Len);
  
           --  If style checking is inactive, check maximum line length against
-         --  standard value. Note that we take this from Opt.Max_Line_Length
-         --  rather than Hostparm.Max_Line_Length because we do not want to
-         --  impose any limit during scanning of configuration pragma files,
-         --  and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
-         --  is reset to Column_Number'Max during scanning of such files.
+         --  standard value.
  
-         elsif Len > Opt.Max_Line_Length then
+         elsif Len > Max_Line_Length then
              Error_Long_Line;
           end if;
+
+         --  Now one more checking circuit. Normally we are only enforcing a
+         --  limit of physical characters, with tabs counting as one character.
+         --  But if after tab expansion we would have a total line length that
+         --  exceeded 32766, that would really cause trouble, because column
+         --  positions would exceed the maximum we allow for a column count.
+         --  Note: the limit is 32766 rather than 32767, since we use a value
+         --  of 32767 for special purposes (see Sinput). Now we really do not
+         --  want to go messing with tabs in the normal case, so what we do is
+         --  to check for a line that has more than 4096 physical characters.
+         --  Any shorter line could not be a problem, even if it was all tabs.
+
+         if Len >= 4096 then
+            declare
+               Col : Natural;
+               Ptr : Source_Ptr;
+
+            begin
+               Col := 1;
+               Ptr := Current_Line_Start;
+               loop
+                  exit when Ptr = Scan_Ptr;
+
+                  if Source (Ptr) = ASCII.HT then
+                     Col := (Col - 1 + 8) / 8 * 8 + 1;
+                  else
+                     Col := Col + 1;
+                  end if;
+
+                  if Col > 32766 then
+                     Error_Msg
+                       ("this line is longer than 32766 characters",
+                        Current_Line_Start);
+                     raise Unrecoverable_Error;
+                  end if;
+
+                  Ptr := Ptr + 1;
+               end loop;
+            end;
+         end if;
+
+         --  Reset wide character byte count for next line
+
+         Wide_Char_Byte_Count := 0;
        end Check_End_Of_Line;
  
        -----------------------
@@ -395,6 +350,7 @@ package body Scng is
  
        procedure Error_Illegal_Wide_Character is
        begin
+         Scan_Ptr := Scan_Ptr + 1;
           Error_Msg ("illegal wide character", Wptr);
        end Error_Illegal_Wide_Character;
  
@@ -406,7 +362,7 @@ package body Scng is
        begin
           Error_Msg
             ("this line is too long",
-            Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
+            Current_Line_Start + Source_Ptr (Max_Line_Length));
        end Error_Long_Line;
  
        -------------------------------
@@ -421,19 +377,19 @@ package body Scng is
  
           if Source (Scan_Ptr) = '_' then
              if Source (Scan_Ptr - 1) = '_' then
-               Error_Msg_S
+               Error_Msg_S -- CODEFIX
                   ("two consecutive underlines not permitted");
              else
-               Error_Msg_S
+               Error_Msg_S -- CODEFIX???
                   ("underline cannot follow punctuation character");
              end if;
  
           else
              if Source (Scan_Ptr - 1) = '_' then
-               Error_Msg_S
+               Error_Msg_S -- CODEFIX???
                   ("punctuation character cannot follow underline");
              else
-               Error_Msg_S
+               Error_Msg_S -- CODEFIX???
                   ("two consecutive punctuation characters not permitted");
              end if;
           end if;
@@ -560,7 +516,6 @@ package body Scng is
           UI_Int_Value := Uint_0;
           Scale := 0;
           Scan_Integer;
-         Scale := 0;
           Point_Scanned := False;
           UI_Num_Value := UI_Int_Value;
  
@@ -616,7 +571,7 @@ package body Scng is
  
                 if Warn_On_Obsolescent_Feature then
                    Error_Msg_S
-                    ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
+                    ("use of "":"" is an obsolescent feature (RM J.2(3))?");
                    Error_Msg_S
                      ("\use ""'#"" instead?");
                 end if;
@@ -830,12 +785,12 @@ package body Scng is
  
           procedure Set_String;
           --  Procedure used to distinguish between string and operator symbol.
-         --  On entry the string has been scanned out, and its characters
-         --  start at Token_Ptr and end one character before Scan_Ptr. On exit
-         --  Token is set to Tok_String_Literal or Tok_Operator_Symbol as
-         --  appropriate, and Token_Node is appropriately initialized. In
-         --  addition, in the operator symbol case, Token_Name is
-         --  appropriately set.
+         --  On entry the string has been scanned out, and its characters start
+         --  at Token_Ptr and end one character before Scan_Ptr. On exit Token
+         --  is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate,
+         --  and Token_Node is appropriately initialized. In addition, in the
+         --  operator symbol case, Token_Name is appropriately set, and the
+         --  flags [Wide_]Wide_Character_Found are set appropriately.
  
           ---------------------------
           -- Error_Bad_String_Char --
@@ -920,7 +875,8 @@ package body Scng is
                 end if;
              end if;
  
-            Error_Msg_S ("missing string quote");
+            Error_Msg_S --  CODEFIX
+              ("missing string quote");
           end Error_Unterminated_String;
  
           ----------------
@@ -1060,7 +1016,10 @@ package body Scng is
  
           Delimiter := Source (Scan_Ptr);
           Accumulate_Checksum (Delimiter);
+
           Start_String;
+         Wide_Character_Found      := False;
+         Wide_Wide_Character_Found := False;
           Scan_Ptr := Scan_Ptr + 1;
  
           --  Loop to scan out characters of string literal
@@ -1102,8 +1061,12 @@ package body Scng is
  
                    Accumulate_Checksum (Code);
  
+                  --  In Ada 95 mode we allow any wide characters in a string
+                  --  but in Ada 2005, the set of characters allowed has been
+                  --  restricted to graphic characters.
+
                    if Ada_Version >= Ada_05
-                    and then Is_UTF_32_Non_Graphic (Code)
+                    and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
                    then
                       Error_Msg
                         ("(Ada 2005) non-graphic character not permitted " &
@@ -1136,7 +1099,11 @@ package body Scng is
              Store_String_Char (Code);
  
              if not In_Character_Range (Code) then
-               Wide_Character_Found := True;
+               if In_Wide_Character_Range (Code) then
+                  Wide_Character_Found := True;
+               else
+                  Wide_Wide_Character_Found := True;
+               end if;
              end if;
           end loop;
  
@@ -1219,7 +1186,10 @@ package body Scng is
           --  Horizontal tab, just skip past it
  
           when HT =>
-            if Style_Check then Style.Check_HT; end if;
+            if Style_Check then
+               Style.Check_HT;
+            end if;
+
              Scan_Ptr := Scan_Ptr + 1;
  
           --  End of file character, treated as an end of file only if it is
@@ -1228,6 +1198,11 @@ package body Scng is
           when EOF =>
              if Scan_Ptr = Source_Last (Current_Source_File) then
                 Check_End_Of_Line;
+
+               if Style_Check then
+                  Style.Check_EOF;
+               end if;
+
                 Token := Tok_EOF;
                 return;
              else
@@ -1277,7 +1252,11 @@ package body Scng is
  
              if Double_Char_Token ('=') then
                 Token := Tok_Colon_Equal;
-               if Style_Check then Style.Check_Colon_Equal; end if;
+
+               if Style_Check then
+                  Style.Check_Colon_Equal;
+               end if;
+
                 return;
  
              elsif Source (Scan_Ptr + 1) = '-'
@@ -1291,7 +1270,11 @@ package body Scng is
              else
                 Scan_Ptr := Scan_Ptr + 1;
                 Token := Tok_Colon;
-               if Style_Check then Style.Check_Colon; end if;
+
+               if Style_Check then
+                  Style.Check_Colon;
+               end if;
+
                 return;
              end if;
  
@@ -1301,7 +1284,11 @@ package body Scng is
              Accumulate_Checksum ('(');
              Scan_Ptr := Scan_Ptr + 1;
              Token := Tok_Left_Paren;
-            if Style_Check then Style.Check_Left_Paren; end if;
+
+            if Style_Check then
+               Style.Check_Left_Paren;
+            end if;
+
              return;
  
           --  Left bracket
@@ -1331,7 +1318,11 @@ package body Scng is
              Accumulate_Checksum (',');
              Scan_Ptr := Scan_Ptr + 1;
              Token := Tok_Comma;
-            if Style_Check then Style.Check_Comma; end if;
+
+            if Style_Check then
+               Style.Check_Comma;
+            end if;
+
              return;
  
           --  Dot, which is either an isolated period, or part of a double dot
@@ -1343,7 +1334,11 @@ package body Scng is
  
              if Double_Char_Token ('.') then
                 Token := Tok_Dot_Dot;
-               if Style_Check then Style.Check_Dot_Dot; end if;
+
+               if Style_Check then
+                  Style.Check_Dot_Dot;
+               end if;
+
                 return;
  
              elsif Source (Scan_Ptr + 1) in '0' .. '9' then
@@ -1364,7 +1359,11 @@ package body Scng is
  
              if Double_Char_Token ('>') then
                 Token := Tok_Arrow;
-               if Style_Check then Style.Check_Arrow; end if;
+
+               if Style_Check then
+                  Style.Check_Arrow;
+               end if;
+
                 return;
  
              elsif Source (Scan_Ptr + 1) = '=' then
@@ -1409,7 +1408,11 @@ package body Scng is
  
              elsif Double_Char_Token ('>') then
                 Token := Tok_Box;
-               if Style_Check then Style.Check_Box; end if;
+
+               if Style_Check then
+                  Style.Check_Box;
+               end if;
+
                 return;
  
              elsif Double_Char_Token ('<') then
@@ -1441,8 +1444,22 @@ package body Scng is
              --  Comment
  
              else -- Source (Scan_Ptr + 1) = '-' then
-               if Style_Check then Style.Check_Comment; end if;
+               if Style_Check then
+                  Style.Check_Comment;
+               end if;
+
                 Scan_Ptr := Scan_Ptr + 2;
+
+               --  If we are in preprocessor mode with Replace_In_Comments set,
+               --  then we return the "--" as a token on its own.
+
+               if Replace_In_Comments then
+                  Token := Tok_Comment;
+                  return;
+               end if;
+
+               --  Otherwise scan out the comment
+
                 Start_Of_Comment := Scan_Ptr;
  
                 --  Loop to scan comment (this loop runs more than once only if
@@ -1476,7 +1493,10 @@ package body Scng is
                    --  Keep going if horizontal tab
  
                    if Source (Scan_Ptr) = HT then
-                     if Style_Check then Style.Check_HT; end if;
+                     if Style_Check then
+                        Style.Check_HT;
+                     end if;
+
                       Scan_Ptr := Scan_Ptr + 1;
  
                    --  Terminate scan of comment if line terminator
@@ -1515,7 +1535,7 @@ package body Scng is
  
                          --  If UTF_32 terminator, terminate comment scan
  
-                        elsif Is_UTF_32_Line_Terminator (Code) then
+                        elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
                             Scan_Ptr := Wptr;
                             exit;
                          end if;
@@ -1567,7 +1587,7 @@ package body Scng is
  
              if Warn_On_Obsolescent_Feature then
                 Error_Msg_S
-                 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
+                 ("use of ""'%"" is an obsolescent feature (RM J.2(4))?");
                 Error_Msg_S
                   ("\use """""" instead?");
              end if;
@@ -1610,7 +1630,11 @@ package body Scng is
                 or else Prev_Token in Token_Class_Literal
              then
                 Token := Tok_Apostrophe;
-               if Style_Check then Style.Check_Apostrophe; end if;
+
+               if Style_Check then
+                  Style.Check_Apostrophe;
+               end if;
+
                 return;
  
              --  Otherwise the apostrophe starts a character literal
@@ -1638,8 +1662,12 @@ package body Scng is
                       Error_Illegal_Wide_Character;
                       Code := Character'Pos (' ');
  
+                  --  In Ada 95 mode we allow any wide character in a character
+                  --  literal, but in Ada 2005, the set of characters allowed
+                  --  is restricted to graphic characters.
+
                    elsif Ada_Version >= Ada_05
-                    and then Is_UTF_32_Non_Graphic (Code)
+                    and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
                    then
                       Error_Msg
                         ("(Ada 2005) non-graphic character not permitted " &
@@ -1711,7 +1739,11 @@ package body Scng is
              Accumulate_Checksum (')');
              Scan_Ptr := Scan_Ptr + 1;
              Token := Tok_Right_Paren;
-            if Style_Check then Style.Check_Right_Paren; end if;
+
+            if Style_Check then
+               Style.Check_Right_Paren;
+            end if;
+
              return;
  
           --  Right bracket or right brace, treated as right paren
@@ -1742,7 +1774,11 @@ package body Scng is
              Accumulate_Checksum (';');
              Scan_Ptr := Scan_Ptr + 1;
              Token := Tok_Semicolon;
-            if Style_Check then Style.Check_Semicolon; end if;
+
+            if Style_Check then
+               Style.Check_Semicolon;
+            end if;
+
              return;
  
           --  Vertical bar
@@ -1761,7 +1797,11 @@ package body Scng is
              else
                 Scan_Ptr := Scan_Ptr + 1;
                 Token := Tok_Vertical_Bar;
-               if Style_Check then Style.Check_Vertical_Bar; end if;
+
+               if Style_Check then
+                  Style.Check_Vertical_Bar;
+               end if;
+
                 return;
              end if;
           end Vertical_Bar_Case;
@@ -1774,7 +1814,7 @@ package body Scng is
  
              if Warn_On_Obsolescent_Feature then
                 Error_Msg_S
-                 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
+                 ("use of ""'!"" is an obsolescent feature (RM J.2(2))?");
                 Error_Msg_S
                   ("\use ""'|"" instead?");
              end if;
@@ -1804,12 +1844,59 @@ package body Scng is
           --  Digits starting a numeric literal
  
           when '0' .. '9' =>
+
+            --  First a bit of a scan ahead to see if we have a case of an
+            --  identifier starting with a digit (remembering exponent case).
+
+            declare
+               C : constant Character := Source (Scan_Ptr + 1);
+
+            begin
+               --  OK literal if digit followed by digit or underscore
+
+               if C in '0' .. '9' or else C = '_' then
+                  null;
+
+               --  OK literal if digit not followed by identifier char
+
+               elsif not Identifier_Char (C) then
+                  null;
+
+               --  OK literal if digit followed by e/E followed by digit/sign.
+               --  We also allow underscore after the E, which is an error, but
+               --  better handled by Nlit than deciding this is an identifier.
+
+               elsif (C = 'e' or else C = 'E')
+                 and then (Source (Scan_Ptr + 2) in '0' .. '9'
+                             or else Source (Scan_Ptr + 2) = '+'
+                             or else Source (Scan_Ptr + 2) = '-'
+                             or else Source (Scan_Ptr + 2) = '_')
+               then
+                  null;
+
+               --  Here we have what really looks like an identifier that
+               --  starts with a digit, so give error msg.
+
+               else
+                  Error_Msg_S ("identifier may not start with digit");
+                  Name_Len := 1;
+                  Underline_Found := False;
+                  Name_Buffer (1) := Source (Scan_Ptr);
+                  Accumulate_Checksum (Name_Buffer (1));
+                  Scan_Ptr := Scan_Ptr + 1;
+                  goto Scan_Identifier;
+               end if;
+            end;
+
+            --  Here we have an OK integer literal
+
              Nlit;
  
              if Identifier_Char (Source (Scan_Ptr)) then
                 Error_Msg_S
                   ("delimiter required between literal and identifier");
              end if;
+
              Post_Scan;
              return;
  
@@ -1899,7 +1986,7 @@ package body Scng is
  
           --  Invalid control characters
  
-         when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | SO  |
+         when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | ASCII.SO |
                SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
                EM  | FS  | GS  | RS  | US  | DEL
           =>
@@ -1942,6 +2029,7 @@ package body Scng is
  
           declare
              Code : Char_Code;
+            Cat  : Category;
              Err  : Boolean;
  
           begin
@@ -1953,10 +2041,13 @@ package body Scng is
              if Err then
                 Error_Illegal_Wide_Character;
                 goto Scan_Next_Character;
+            end if;
+
+            Cat := Get_Category (UTF_32 (Code));
  
              --  If OK letter, reset scan ptr and go scan identifier
  
-            elsif Is_UTF_32_Letter (Code) then
+            if Is_UTF_32_Letter (Cat) then
                 Scan_Ptr := Wptr;
                 Name_Len := 0;
                 Underline_Found := False;
@@ -1965,18 +2056,18 @@ package body Scng is
              --  If OK wide space, ignore and keep scanning (we do not include
              --  any ignored spaces in checksum)
  
-            elsif Is_UTF_32_Space (Code) then
+            elsif Is_UTF_32_Space (Cat) then
                 goto Scan_Next_Character;
  
              --  If OK wide line terminator, terminate current line
  
-            elsif Is_UTF_32_Line_Terminator (Code) then
+            elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
                 Scan_Ptr := Wptr;
                 goto Scan_Line_Terminator;
  
              --  Punctuation is an error (at start of identifier)
  
-            elsif Is_UTF_32_Punctuation (Code) then
+            elsif Is_UTF_32_Punctuation (Cat) then
                 Error_Msg
                   ("identifier cannot start with punctuation", Wptr);
                 Scan_Ptr := Wptr;
@@ -1984,9 +2075,9 @@ package body Scng is
                 Underline_Found := False;
                 goto Scan_Identifier;
  
-            --  Mark character is an error (at start of identifer)
+            --  Mark character is an error (at start of identifier)
  
-            elsif Is_UTF_32_Mark (Code) then
+            elsif Is_UTF_32_Mark (Cat) then
                 Error_Msg
                   ("identifier cannot start with mark character", Wptr);
                 Scan_Ptr := Wptr;
@@ -1994,9 +2085,9 @@ package body Scng is
                 Underline_Found := False;
                 goto Scan_Identifier;
  
-            --  Other format character is an error (at start of identifer)
+            --  Other format character is an error (at start of identifier)
  
-            elsif Is_UTF_32_Other (Code) then
+            elsif Is_UTF_32_Other (Cat) then
                 Error_Msg
                   ("identifier cannot start with other format character", Wptr);
                 Scan_Ptr := Wptr;
@@ -2008,7 +2099,7 @@ package body Scng is
              --  identifier or bad literal. Not worth doing too much to try to
              --  distinguish these cases, but we will do a little bit.
  
-            elsif Is_UTF_32_Digit (Code) then
+            elsif Is_UTF_32_Digit (Cat) then
                 Error_Msg
                   ("identifier cannot start with digit character", Wptr);
                 Scan_Ptr := Wptr;
@@ -2026,7 +2117,7 @@ package body Scng is
  
        --  Routine to scan line terminator. On entry Scan_Ptr points to a
        --  character which is one of FF,LR,CR,VT, or one of the wide characters
-      --  that is treated as a line termiantor.
+      --  that is treated as a line terminator.
  
        <<Scan_Line_Terminator>>
  
@@ -2069,7 +2160,7 @@ package body Scng is
  
        --  Identifier scanning routine. On entry, some initial characters of
        --  the identifier may have already been stored in Name_Buffer. If so,
-      --  Name_Len has the number of characters stored. otherwise Name_Len is
+      --  Name_Len has the number of characters stored, otherwise Name_Len is
        --  set to zero on entry. Underline_Found is also set False on entry.
  
        <<Scan_Identifier>>
@@ -2107,8 +2198,10 @@ package body Scng is
           --  is active, so if we find an ESC character we know that we have a
           --  wide character.
  
-         if Identifier_Char (Source (Scan_Ptr)) then
-
+         if Identifier_Char (Source (Scan_Ptr))
+           or else (Source (Scan_Ptr) in Upper_Half_Character
+                     and then Upper_Half_Encoding)
+         then
              --  Case of underline
  
              if Source (Scan_Ptr) = '_' then
@@ -2155,9 +2248,10 @@ package body Scng is
                 --  encoding into the name table entry for the identifier.
  
                 declare
-                  Code   : Char_Code;
-                  Err    : Boolean;
-                  Chr    : Character;
+                  Code : Char_Code;
+                  Err  : Boolean;
+                  Chr  : Character;
+                  Cat  : Category;
  
                 begin
                    Wptr := Scan_Ptr;
@@ -2198,19 +2292,22 @@ package body Scng is
                         ("wide character not allowed in identifier", Wptr);
                       end if;
  
+                     Cat := Get_Category (UTF_32 (Code));
+
                       --  If OK letter, store it folding to upper case. Note
                       --  that we include the folded letter in the checksum.
  
-                     if Is_UTF_32_Letter (Code) then
-                        Code := UTF_32_To_Upper_Case (Code);
+                     if Is_UTF_32_Letter (Cat) then
+                        Code :=
+                          Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
                          Accumulate_Checksum (Code);
                          Store_Encoded_Character (Code);
                          Underline_Found := False;
  
                       --  If OK extended digit or mark, then store it
  
-                     elsif Is_UTF_32_Digit (Code)
-                       or else Is_UTF_32_Mark (Code)
+                     elsif Is_UTF_32_Digit (Cat)
+                       or else Is_UTF_32_Mark (Cat)
                       then
                          Accumulate_Checksum (Code);
                          Store_Encoded_Character (Code);
@@ -2219,7 +2316,7 @@ package body Scng is
                       --  Wide punctuation is also stored, but counts as an
                       --  underline character for error checking purposes.
  
-                     elsif Is_UTF_32_Punctuation (Code) then
+                     elsif Is_UTF_32_Punctuation (Cat) then
                          Accumulate_Checksum (Code);
  
                          if Underline_Found then
@@ -2236,17 +2333,21 @@ package body Scng is
                             Underline_Found := True;
                          end if;
  
-                     --  Wide character in Unicode cateogory "Other, Format"
+                     --  Wide character in Unicode category "Other, Format"
                       --  is accepted in an identifier, but is ignored and not
                       --  stored. It seems reasonable to exclude it from the
                       --  checksum.
  
-                     elsif Is_UTF_32_Other (Code) then
+                     --  Note that it is correct (see AI-395) to simply strip
+                     --  other format characters, before testing for double
+                     --  underlines, or for reserved words).
+
+                     elsif Is_UTF_32_Other (Cat) then
                          null;
  
                       --  Wide character in category Separator,Space terminates
  
-                     elsif Is_UTF_32_Space (Code) then
+                     elsif Is_UTF_32_Space (Cat) then
                          goto Scan_Identifier_Complete;
  
                       --  Any other wide character is not acceptable
@@ -2284,36 +2385,51 @@ package body Scng is
  
           --  Here is where we check if it was a keyword
  
-         if Get_Name_Table_Byte (Token_Name) /= 0
-           and then (Ada_Version >= Ada_95
-                       or else Token_Name not in Ada_95_Reserved_Words)
-           and then (Ada_Version >= Ada_05
-                       or else Token_Name not in Ada_2005_Reserved_Words)
-         then
+         if Is_Keyword_Name (Token_Name) then
              Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
  
-            --  Deal with possible style check for non-lower case keyword, but
-            --  we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
-            --  this purpose if they appear as attribute designators. Actually
-            --  we only check the first character for speed.
-
-            --  Ada 2005 (AI-284): Do not apply the style check in case of
-            --  "pragma Interface"
-
-            if Style_Check
-              and then Source (Token_Ptr) <= 'Z'
-              and then (Prev_Token /= Tok_Apostrophe
-                          or else
-                            (Token /= Tok_Access
-                               and then Token /= Tok_Delta
-                               and then Token /= Tok_Digits
-                               and then Token /= Tok_Range))
-              and then (Token /= Tok_Interface
-                          or else
-                            (Token = Tok_Interface
-                               and then Prev_Token /= Tok_Pragma))
-            then
-               Style.Non_Lower_Case_Keyword;
+            --  Keyword style checks
+
+            if Style_Check then
+
+               --  Deal with possible style check for non-lower case keyword,
+               --  but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
+               --  for this purpose if they appear as attribute designators.
+               --  Actually we only check the first character for speed.
+
+               --  Ada 2005 (AI-284): Do not apply the style check in case of
+               --  "pragma Interface"
+
+               --  Ada 2005 (AI-340): Do not apply the style check in case of
+               --  MOD attribute.
+
+               if Source (Token_Ptr) <= 'Z'
+                 and then (Prev_Token /= Tok_Apostrophe
+                           or else
+                             (Token /= Tok_Access and then
+                              Token /= Tok_Delta  and then
+                              Token /= Tok_Digits and then
+                              Token /= Tok_Mod    and then
+                              Token /= Tok_Range))
+                       and then (Token /= Tok_Interface
+                                  or else
+                                    (Token = Tok_Interface
+                                      and then Prev_Token /= Tok_Pragma))
+               then
+                  Style.Non_Lower_Case_Keyword;
+               end if;
+
+               --  Check THEN/ELSE style rules. These do not apply to AND THEN
+               --  or OR ELSE, and do not apply in conditional expressions.
+
+               if (Token = Tok_Then and then Prev_Token /= Tok_And)
+                    or else
+                  (Token = Tok_Else and then Prev_Token /= Tok_Or)
+               then
+                  if Inside_Conditional_Expression = 0 then
+                     Style.Check_Separate_Stmt_Lines;
+                  end if;
+               end if;
              end if;
  
              --  We must reset Token_Name since this is not an identifier and
@@ -2437,16 +2553,26 @@ package body Scng is
           --  Outer loop keeps going only if a horizontal tab follows
  
           if Source (Scan_Ptr) = HT then
-            if Style_Check then Style.Check_HT; end if;
+            if Style_Check then
+               Style.Check_HT;
+            end if;
+
              Scan_Ptr := Scan_Ptr + 1;
              Start_Column := (Start_Column / 8) * 8 + 8;
           else
              exit Tabs_Loop;
           end if;
-
        end loop Tabs_Loop;
  
        return Start_Column;
+
+   --  A constraint error can happen only if we have a compiler with checks on
+   --  and a line with a ludicrous number of tabs or spaces at the start. In
+   --  such a case, we really don't care if Start_Column is right or not.
+
+   exception
+      when Constraint_Error =>
+         return Start_Column;
     end Set_Start_Column;
  
  end Scng;