-- --
-- B o d y --
-- --
--- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
+-- Copyright (C) 1992-2009, Free Software Foundation, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
--- ware Foundation; either version 2, or (at your option) any later ver- --
+-- ware Foundation; either version 3, or (at your option) any later ver- --
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
-- for more details. You should have received a copy of the GNU General --
--- Public License distributed with GNAT; see file COPYING. If not, write --
--- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
--- MA 02111-1307, USA. --
+-- Public License distributed with GNAT; see file COPYING3. If not, go to --
+-- http://www.gnu.org/licenses for a complete copy of the license. --
-- --
-- GNAT was originally developed by the GNAT team at New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc. --
with Csets; use Csets;
with Err_Vars; use Err_Vars;
+with Hostparm; use Hostparm;
with Namet; use Namet;
with Opt; use Opt;
with Scans; use Scans;
with Widechar; use Widechar;
with System.CRC32;
+with System.UTF_32; use System.UTF_32;
with System.WCh_Con; use System.WCh_Con;
package body Scng is
procedure Accumulate_Checksum (C : Char_Code) is
begin
if C > 16#FFFF# then
- Accumulate_Checksum (Character'Val (C / 2 ** 16));
+ Accumulate_Checksum (Character'Val (C / 2 ** 24));
+ Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
Accumulate_Checksum (Character'Val ((C / 256) mod 256));
else
Accumulate_Checksum (Character'Val (C / 256));
-- Initialize_Scanner --
------------------------
- procedure Initialize_Scanner
- (Unit : Unit_Number_Type;
- Index : Source_File_Index)
- is
- procedure Set_Reserved (N : Name_Id; T : Token_Type);
- pragma Inline (Set_Reserved);
- -- Set given name as a reserved keyword (T is the corresponding token)
-
- -------------
- -- Set_NTB --
- -------------
-
- procedure Set_Reserved (N : Name_Id; T : Token_Type) is
- begin
- -- Set up Token_Type values in Names Table entries for reserved
- -- keywords We use the Pos value of the Token_Type value. Note we
- -- rely on the fact that Token_Type'Val (0) is not a reserved word!
-
- Set_Name_Table_Byte (N, Token_Type'Pos (T));
- end Set_Reserved;
-
- -- Start of processing for Initialize_Scanner
-
+ procedure Initialize_Scanner (Index : Source_File_Index) is
begin
-- Establish reserved words
- Set_Reserved (Name_Abort, Tok_Abort);
- Set_Reserved (Name_Abs, Tok_Abs);
- Set_Reserved (Name_Abstract, Tok_Abstract);
- Set_Reserved (Name_Accept, Tok_Accept);
- Set_Reserved (Name_Access, Tok_Access);
- Set_Reserved (Name_And, Tok_And);
- Set_Reserved (Name_Aliased, Tok_Aliased);
- Set_Reserved (Name_All, Tok_All);
- Set_Reserved (Name_Array, Tok_Array);
- Set_Reserved (Name_At, Tok_At);
- Set_Reserved (Name_Begin, Tok_Begin);
- Set_Reserved (Name_Body, Tok_Body);
- Set_Reserved (Name_Case, Tok_Case);
- Set_Reserved (Name_Constant, Tok_Constant);
- Set_Reserved (Name_Declare, Tok_Declare);
- Set_Reserved (Name_Delay, Tok_Delay);
- Set_Reserved (Name_Delta, Tok_Delta);
- Set_Reserved (Name_Digits, Tok_Digits);
- Set_Reserved (Name_Do, Tok_Do);
- Set_Reserved (Name_Else, Tok_Else);
- Set_Reserved (Name_Elsif, Tok_Elsif);
- Set_Reserved (Name_End, Tok_End);
- Set_Reserved (Name_Entry, Tok_Entry);
- Set_Reserved (Name_Exception, Tok_Exception);
- Set_Reserved (Name_Exit, Tok_Exit);
- Set_Reserved (Name_For, Tok_For);
- Set_Reserved (Name_Function, Tok_Function);
- Set_Reserved (Name_Generic, Tok_Generic);
- Set_Reserved (Name_Goto, Tok_Goto);
- Set_Reserved (Name_If, Tok_If);
- Set_Reserved (Name_In, Tok_In);
- Set_Reserved (Name_Is, Tok_Is);
- Set_Reserved (Name_Limited, Tok_Limited);
- Set_Reserved (Name_Loop, Tok_Loop);
- Set_Reserved (Name_Mod, Tok_Mod);
- Set_Reserved (Name_New, Tok_New);
- Set_Reserved (Name_Not, Tok_Not);
- Set_Reserved (Name_Null, Tok_Null);
- Set_Reserved (Name_Of, Tok_Of);
- Set_Reserved (Name_Or, Tok_Or);
- Set_Reserved (Name_Others, Tok_Others);
- Set_Reserved (Name_Out, Tok_Out);
- Set_Reserved (Name_Package, Tok_Package);
- Set_Reserved (Name_Pragma, Tok_Pragma);
- Set_Reserved (Name_Private, Tok_Private);
- Set_Reserved (Name_Procedure, Tok_Procedure);
- Set_Reserved (Name_Protected, Tok_Protected);
- Set_Reserved (Name_Raise, Tok_Raise);
- Set_Reserved (Name_Range, Tok_Range);
- Set_Reserved (Name_Record, Tok_Record);
- Set_Reserved (Name_Rem, Tok_Rem);
- Set_Reserved (Name_Renames, Tok_Renames);
- Set_Reserved (Name_Requeue, Tok_Requeue);
- Set_Reserved (Name_Return, Tok_Return);
- Set_Reserved (Name_Reverse, Tok_Reverse);
- Set_Reserved (Name_Select, Tok_Select);
- Set_Reserved (Name_Separate, Tok_Separate);
- Set_Reserved (Name_Subtype, Tok_Subtype);
- Set_Reserved (Name_Tagged, Tok_Tagged);
- Set_Reserved (Name_Task, Tok_Task);
- Set_Reserved (Name_Terminate, Tok_Terminate);
- Set_Reserved (Name_Then, Tok_Then);
- Set_Reserved (Name_Type, Tok_Type);
- Set_Reserved (Name_Until, Tok_Until);
- Set_Reserved (Name_Use, Tok_Use);
- Set_Reserved (Name_When, Tok_When);
- Set_Reserved (Name_While, Tok_While);
- Set_Reserved (Name_With, Tok_With);
- Set_Reserved (Name_Xor, Tok_Xor);
-
- -- Ada 2005 reserved words
-
- Set_Reserved (Name_Interface, Tok_Interface);
- Set_Reserved (Name_Overriding, Tok_Overriding);
- Set_Reserved (Name_Synchronized, Tok_Synchronized);
+ Scans.Initialize_Ada_Keywords;
-- Initialize scan control variables
Current_Source_File := Index;
Source := Source_Text (Current_Source_File);
- Current_Source_Unit := Unit;
Scan_Ptr := Source_First (Current_Source_File);
Token := No_Token;
Token_Ptr := Scan_Ptr;
First_Non_Blank_Location := Scan_Ptr;
Initialize_Checksum;
+ Wide_Char_Byte_Count := 0;
-- Do not call Scan, otherwise the License stuff does not work in Scn
-- Scan_Ptr points to the opening string quote (the checksum for this
-- character has not been accumulated yet). On return Scan_Ptr points
-- past the closing quote of the string literal, Token and Token_Node
- -- are set appropriately, and the checksum is upated.
+ -- are set appropriately, and the checksum is updated.
-----------------------
-- Check_End_Of_Line --
-----------------------
procedure Check_End_Of_Line is
- Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
+ Len : constant Int :=
+ Int (Scan_Ptr) -
+ Int (Current_Line_Start) -
+ Wide_Char_Byte_Count;
begin
- if Style_Check and Style_Check_Max_Line_Length then
+ if Style_Check then
Style.Check_Line_Terminator (Len);
+ end if;
+
+ -- Deal with checking maximum line length
+
+ if Style_Check and Style_Check_Max_Line_Length then
+ Style.Check_Line_Max_Length (Len);
-- If style checking is inactive, check maximum line length against
- -- standard value. Note that we take this from Opt.Max_Line_Length
- -- rather than Hostparm.Max_Line_Length because we do not want to
- -- impose any limit during scanning of configuration pragma files,
- -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
- -- is reset to Column_Number'Max during scanning of such files.
+ -- standard value.
- elsif Len > Opt.Max_Line_Length then
+ elsif Len > Max_Line_Length then
Error_Long_Line;
end if;
+
+ -- Now one more checking circuit. Normally we are only enforcing a
+ -- limit of physical characters, with tabs counting as one character.
+ -- But if after tab expansion we would have a total line length that
+ -- exceeded 32766, that would really cause trouble, because column
+ -- positions would exceed the maximum we allow for a column count.
+ -- Note: the limit is 32766 rather than 32767, since we use a value
+ -- of 32767 for special purposes (see Sinput). Now we really do not
+ -- want to go messing with tabs in the normal case, so what we do is
+ -- to check for a line that has more than 4096 physical characters.
+ -- Any shorter line could not be a problem, even if it was all tabs.
+
+ if Len >= 4096 then
+ declare
+ Col : Natural;
+ Ptr : Source_Ptr;
+
+ begin
+ Col := 1;
+ Ptr := Current_Line_Start;
+ loop
+ exit when Ptr = Scan_Ptr;
+
+ if Source (Ptr) = ASCII.HT then
+ Col := (Col - 1 + 8) / 8 * 8 + 1;
+ else
+ Col := Col + 1;
+ end if;
+
+ if Col > 32766 then
+ Error_Msg
+ ("this line is longer than 32766 characters",
+ Current_Line_Start);
+ raise Unrecoverable_Error;
+ end if;
+
+ Ptr := Ptr + 1;
+ end loop;
+ end;
+ end if;
+
+ -- Reset wide character byte count for next line
+
+ Wide_Char_Byte_Count := 0;
end Check_End_Of_Line;
-----------------------
procedure Error_Illegal_Wide_Character is
begin
+ Scan_Ptr := Scan_Ptr + 1;
Error_Msg ("illegal wide character", Wptr);
end Error_Illegal_Wide_Character;
begin
Error_Msg
("this line is too long",
- Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
+ Current_Line_Start + Source_Ptr (Max_Line_Length));
end Error_Long_Line;
-------------------------------
if Source (Scan_Ptr) = '_' then
if Source (Scan_Ptr - 1) = '_' then
- Error_Msg_S
+ Error_Msg_S -- CODEFIX
("two consecutive underlines not permitted");
else
- Error_Msg_S
+ Error_Msg_S -- CODEFIX???
("underline cannot follow punctuation character");
end if;
else
if Source (Scan_Ptr - 1) = '_' then
- Error_Msg_S
+ Error_Msg_S -- CODEFIX???
("punctuation character cannot follow underline");
else
- Error_Msg_S
+ Error_Msg_S -- CODEFIX???
("two consecutive punctuation characters not permitted");
end if;
end if;
UI_Int_Value := Uint_0;
Scale := 0;
Scan_Integer;
- Scale := 0;
Point_Scanned := False;
UI_Num_Value := UI_Int_Value;
if Warn_On_Obsolescent_Feature then
Error_Msg_S
- ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
+ ("use of "":"" is an obsolescent feature (RM J.2(3))?");
Error_Msg_S
("\use ""'#"" instead?");
end if;
procedure Set_String;
-- Procedure used to distinguish between string and operator symbol.
- -- On entry the string has been scanned out, and its characters
- -- start at Token_Ptr and end one character before Scan_Ptr. On exit
- -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
- -- appropriate, and Token_Node is appropriately initialized. In
- -- addition, in the operator symbol case, Token_Name is
- -- appropriately set.
+ -- On entry the string has been scanned out, and its characters start
+ -- at Token_Ptr and end one character before Scan_Ptr. On exit Token
+ -- is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate,
+ -- and Token_Node is appropriately initialized. In addition, in the
+ -- operator symbol case, Token_Name is appropriately set, and the
+ -- flags [Wide_]Wide_Character_Found are set appropriately.
---------------------------
-- Error_Bad_String_Char --
end if;
end if;
- Error_Msg_S ("missing string quote");
+ Error_Msg_S -- CODEFIX
+ ("missing string quote");
end Error_Unterminated_String;
----------------
Delimiter := Source (Scan_Ptr);
Accumulate_Checksum (Delimiter);
+
Start_String;
+ Wide_Character_Found := False;
+ Wide_Wide_Character_Found := False;
Scan_Ptr := Scan_Ptr + 1;
-- Loop to scan out characters of string literal
Accumulate_Checksum (Code);
+ -- In Ada 95 mode we allow any wide characters in a string
+ -- but in Ada 2005, the set of characters allowed has been
+ -- restricted to graphic characters.
+
if Ada_Version >= Ada_05
- and then Is_UTF_32_Non_Graphic (Code)
+ and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
then
Error_Msg
("(Ada 2005) non-graphic character not permitted " &
Store_String_Char (Code);
if not In_Character_Range (Code) then
- Wide_Character_Found := True;
+ if In_Wide_Character_Range (Code) then
+ Wide_Character_Found := True;
+ else
+ Wide_Wide_Character_Found := True;
+ end if;
end if;
end loop;
-- Horizontal tab, just skip past it
when HT =>
- if Style_Check then Style.Check_HT; end if;
+ if Style_Check then
+ Style.Check_HT;
+ end if;
+
Scan_Ptr := Scan_Ptr + 1;
-- End of file character, treated as an end of file only if it is
when EOF =>
if Scan_Ptr = Source_Last (Current_Source_File) then
Check_End_Of_Line;
+
+ if Style_Check then
+ Style.Check_EOF;
+ end if;
+
Token := Tok_EOF;
return;
else
if Double_Char_Token ('=') then
Token := Tok_Colon_Equal;
- if Style_Check then Style.Check_Colon_Equal; end if;
+
+ if Style_Check then
+ Style.Check_Colon_Equal;
+ end if;
+
return;
elsif Source (Scan_Ptr + 1) = '-'
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Colon;
- if Style_Check then Style.Check_Colon; end if;
+
+ if Style_Check then
+ Style.Check_Colon;
+ end if;
+
return;
end if;
Accumulate_Checksum ('(');
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Left_Paren;
- if Style_Check then Style.Check_Left_Paren; end if;
+
+ if Style_Check then
+ Style.Check_Left_Paren;
+ end if;
+
return;
-- Left bracket
Accumulate_Checksum (',');
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Comma;
- if Style_Check then Style.Check_Comma; end if;
+
+ if Style_Check then
+ Style.Check_Comma;
+ end if;
+
return;
-- Dot, which is either an isolated period, or part of a double dot
if Double_Char_Token ('.') then
Token := Tok_Dot_Dot;
- if Style_Check then Style.Check_Dot_Dot; end if;
+
+ if Style_Check then
+ Style.Check_Dot_Dot;
+ end if;
+
return;
elsif Source (Scan_Ptr + 1) in '0' .. '9' then
if Double_Char_Token ('>') then
Token := Tok_Arrow;
- if Style_Check then Style.Check_Arrow; end if;
+
+ if Style_Check then
+ Style.Check_Arrow;
+ end if;
+
return;
elsif Source (Scan_Ptr + 1) = '=' then
elsif Double_Char_Token ('>') then
Token := Tok_Box;
- if Style_Check then Style.Check_Box; end if;
+
+ if Style_Check then
+ Style.Check_Box;
+ end if;
+
return;
elsif Double_Char_Token ('<') then
-- Comment
else -- Source (Scan_Ptr + 1) = '-' then
- if Style_Check then Style.Check_Comment; end if;
+ if Style_Check then
+ Style.Check_Comment;
+ end if;
+
Scan_Ptr := Scan_Ptr + 2;
+
+ -- If we are in preprocessor mode with Replace_In_Comments set,
+ -- then we return the "--" as a token on its own.
+
+ if Replace_In_Comments then
+ Token := Tok_Comment;
+ return;
+ end if;
+
+ -- Otherwise scan out the comment
+
Start_Of_Comment := Scan_Ptr;
-- Loop to scan comment (this loop runs more than once only if
-- Keep going if horizontal tab
if Source (Scan_Ptr) = HT then
- if Style_Check then Style.Check_HT; end if;
+ if Style_Check then
+ Style.Check_HT;
+ end if;
+
Scan_Ptr := Scan_Ptr + 1;
-- Terminate scan of comment if line terminator
-- If UTF_32 terminator, terminate comment scan
- elsif Is_UTF_32_Line_Terminator (Code) then
+ elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
Scan_Ptr := Wptr;
exit;
end if;
if Warn_On_Obsolescent_Feature then
Error_Msg_S
- ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
+ ("use of ""'%"" is an obsolescent feature (RM J.2(4))?");
Error_Msg_S
("\use """""" instead?");
end if;
or else Prev_Token in Token_Class_Literal
then
Token := Tok_Apostrophe;
- if Style_Check then Style.Check_Apostrophe; end if;
+
+ if Style_Check then
+ Style.Check_Apostrophe;
+ end if;
+
return;
-- Otherwise the apostrophe starts a character literal
Error_Illegal_Wide_Character;
Code := Character'Pos (' ');
+ -- In Ada 95 mode we allow any wide character in a character
+ -- literal, but in Ada 2005, the set of characters allowed
+ -- is restricted to graphic characters.
+
elsif Ada_Version >= Ada_05
- and then Is_UTF_32_Non_Graphic (Code)
+ and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
then
Error_Msg
("(Ada 2005) non-graphic character not permitted " &
Accumulate_Checksum (')');
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Right_Paren;
- if Style_Check then Style.Check_Right_Paren; end if;
+
+ if Style_Check then
+ Style.Check_Right_Paren;
+ end if;
+
return;
-- Right bracket or right brace, treated as right paren
Accumulate_Checksum (';');
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Semicolon;
- if Style_Check then Style.Check_Semicolon; end if;
+
+ if Style_Check then
+ Style.Check_Semicolon;
+ end if;
+
return;
-- Vertical bar
else
Scan_Ptr := Scan_Ptr + 1;
Token := Tok_Vertical_Bar;
- if Style_Check then Style.Check_Vertical_Bar; end if;
+
+ if Style_Check then
+ Style.Check_Vertical_Bar;
+ end if;
+
return;
end if;
end Vertical_Bar_Case;
if Warn_On_Obsolescent_Feature then
Error_Msg_S
- ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
+ ("use of ""'!"" is an obsolescent feature (RM J.2(2))?");
Error_Msg_S
("\use ""'|"" instead?");
end if;
-- Digits starting a numeric literal
when '0' .. '9' =>
+
+ -- First a bit of a scan ahead to see if we have a case of an
+ -- identifier starting with a digit (remembering exponent case).
+
+ declare
+ C : constant Character := Source (Scan_Ptr + 1);
+
+ begin
+ -- OK literal if digit followed by digit or underscore
+
+ if C in '0' .. '9' or else C = '_' then
+ null;
+
+ -- OK literal if digit not followed by identifier char
+
+ elsif not Identifier_Char (C) then
+ null;
+
+ -- OK literal if digit followed by e/E followed by digit/sign.
+ -- We also allow underscore after the E, which is an error, but
+ -- better handled by Nlit than deciding this is an identifier.
+
+ elsif (C = 'e' or else C = 'E')
+ and then (Source (Scan_Ptr + 2) in '0' .. '9'
+ or else Source (Scan_Ptr + 2) = '+'
+ or else Source (Scan_Ptr + 2) = '-'
+ or else Source (Scan_Ptr + 2) = '_')
+ then
+ null;
+
+ -- Here we have what really looks like an identifier that
+ -- starts with a digit, so give error msg.
+
+ else
+ Error_Msg_S ("identifier may not start with digit");
+ Name_Len := 1;
+ Underline_Found := False;
+ Name_Buffer (1) := Source (Scan_Ptr);
+ Accumulate_Checksum (Name_Buffer (1));
+ Scan_Ptr := Scan_Ptr + 1;
+ goto Scan_Identifier;
+ end if;
+ end;
+
+ -- Here we have an OK integer literal
+
Nlit;
if Identifier_Char (Source (Scan_Ptr)) then
Error_Msg_S
("delimiter required between literal and identifier");
end if;
+
Post_Scan;
return;
-- Invalid control characters
- when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
+ when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
EM | FS | GS | RS | US | DEL
=>
declare
Code : Char_Code;
+ Cat : Category;
Err : Boolean;
begin
if Err then
Error_Illegal_Wide_Character;
goto Scan_Next_Character;
+ end if;
+
+ Cat := Get_Category (UTF_32 (Code));
-- If OK letter, reset scan ptr and go scan identifier
- elsif Is_UTF_32_Letter (Code) then
+ if Is_UTF_32_Letter (Cat) then
Scan_Ptr := Wptr;
Name_Len := 0;
Underline_Found := False;
-- If OK wide space, ignore and keep scanning (we do not include
-- any ignored spaces in checksum)
- elsif Is_UTF_32_Space (Code) then
+ elsif Is_UTF_32_Space (Cat) then
goto Scan_Next_Character;
-- If OK wide line terminator, terminate current line
- elsif Is_UTF_32_Line_Terminator (Code) then
+ elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
Scan_Ptr := Wptr;
goto Scan_Line_Terminator;
-- Punctuation is an error (at start of identifier)
- elsif Is_UTF_32_Punctuation (Code) then
+ elsif Is_UTF_32_Punctuation (Cat) then
Error_Msg
("identifier cannot start with punctuation", Wptr);
Scan_Ptr := Wptr;
Underline_Found := False;
goto Scan_Identifier;
- -- Mark character is an error (at start of identifer)
+ -- Mark character is an error (at start of identifier)
- elsif Is_UTF_32_Mark (Code) then
+ elsif Is_UTF_32_Mark (Cat) then
Error_Msg
("identifier cannot start with mark character", Wptr);
Scan_Ptr := Wptr;
Underline_Found := False;
goto Scan_Identifier;
- -- Other format character is an error (at start of identifer)
+ -- Other format character is an error (at start of identifier)
- elsif Is_UTF_32_Other (Code) then
+ elsif Is_UTF_32_Other (Cat) then
Error_Msg
("identifier cannot start with other format character", Wptr);
Scan_Ptr := Wptr;
-- identifier or bad literal. Not worth doing too much to try to
-- distinguish these cases, but we will do a little bit.
- elsif Is_UTF_32_Digit (Code) then
+ elsif Is_UTF_32_Digit (Cat) then
Error_Msg
("identifier cannot start with digit character", Wptr);
Scan_Ptr := Wptr;
-- Routine to scan line terminator. On entry Scan_Ptr points to a
-- character which is one of FF,LR,CR,VT, or one of the wide characters
- -- that is treated as a line termiantor.
+ -- that is treated as a line terminator.
<<Scan_Line_Terminator>>
-- Identifier scanning routine. On entry, some initial characters of
-- the identifier may have already been stored in Name_Buffer. If so,
- -- Name_Len has the number of characters stored. otherwise Name_Len is
+ -- Name_Len has the number of characters stored, otherwise Name_Len is
-- set to zero on entry. Underline_Found is also set False on entry.
<<Scan_Identifier>>
-- is active, so if we find an ESC character we know that we have a
-- wide character.
- if Identifier_Char (Source (Scan_Ptr)) then
-
+ if Identifier_Char (Source (Scan_Ptr))
+ or else (Source (Scan_Ptr) in Upper_Half_Character
+ and then Upper_Half_Encoding)
+ then
-- Case of underline
if Source (Scan_Ptr) = '_' then
-- encoding into the name table entry for the identifier.
declare
- Code : Char_Code;
- Err : Boolean;
- Chr : Character;
+ Code : Char_Code;
+ Err : Boolean;
+ Chr : Character;
+ Cat : Category;
begin
Wptr := Scan_Ptr;
("wide character not allowed in identifier", Wptr);
end if;
+ Cat := Get_Category (UTF_32 (Code));
+
-- If OK letter, store it folding to upper case. Note
-- that we include the folded letter in the checksum.
- if Is_UTF_32_Letter (Code) then
- Code := UTF_32_To_Upper_Case (Code);
+ if Is_UTF_32_Letter (Cat) then
+ Code :=
+ Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
Accumulate_Checksum (Code);
Store_Encoded_Character (Code);
Underline_Found := False;
-- If OK extended digit or mark, then store it
- elsif Is_UTF_32_Digit (Code)
- or else Is_UTF_32_Mark (Code)
+ elsif Is_UTF_32_Digit (Cat)
+ or else Is_UTF_32_Mark (Cat)
then
Accumulate_Checksum (Code);
Store_Encoded_Character (Code);
-- Wide punctuation is also stored, but counts as an
-- underline character for error checking purposes.
- elsif Is_UTF_32_Punctuation (Code) then
+ elsif Is_UTF_32_Punctuation (Cat) then
Accumulate_Checksum (Code);
if Underline_Found then
Underline_Found := True;
end if;
- -- Wide character in Unicode cateogory "Other, Format"
+ -- Wide character in Unicode category "Other, Format"
-- is accepted in an identifier, but is ignored and not
-- stored. It seems reasonable to exclude it from the
-- checksum.
- elsif Is_UTF_32_Other (Code) then
+ -- Note that it is correct (see AI-395) to simply strip
+ -- other format characters, before testing for double
+ -- underlines, or for reserved words).
+
+ elsif Is_UTF_32_Other (Cat) then
null;
-- Wide character in category Separator,Space terminates
- elsif Is_UTF_32_Space (Code) then
+ elsif Is_UTF_32_Space (Cat) then
goto Scan_Identifier_Complete;
-- Any other wide character is not acceptable
-- Here is where we check if it was a keyword
- if Get_Name_Table_Byte (Token_Name) /= 0
- and then (Ada_Version >= Ada_95
- or else Token_Name not in Ada_95_Reserved_Words)
- and then (Ada_Version >= Ada_05
- or else Token_Name not in Ada_2005_Reserved_Words)
- then
+ if Is_Keyword_Name (Token_Name) then
Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
- -- Deal with possible style check for non-lower case keyword, but
- -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
- -- this purpose if they appear as attribute designators. Actually
- -- we only check the first character for speed.
-
- -- Ada 2005 (AI-284): Do not apply the style check in case of
- -- "pragma Interface"
-
- if Style_Check
- and then Source (Token_Ptr) <= 'Z'
- and then (Prev_Token /= Tok_Apostrophe
- or else
- (Token /= Tok_Access
- and then Token /= Tok_Delta
- and then Token /= Tok_Digits
- and then Token /= Tok_Range))
- and then (Token /= Tok_Interface
- or else
- (Token = Tok_Interface
- and then Prev_Token /= Tok_Pragma))
- then
- Style.Non_Lower_Case_Keyword;
+ -- Keyword style checks
+
+ if Style_Check then
+
+ -- Deal with possible style check for non-lower case keyword,
+ -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
+ -- for this purpose if they appear as attribute designators.
+ -- Actually we only check the first character for speed.
+
+ -- Ada 2005 (AI-284): Do not apply the style check in case of
+ -- "pragma Interface"
+
+ -- Ada 2005 (AI-340): Do not apply the style check in case of
+ -- MOD attribute.
+
+ if Source (Token_Ptr) <= 'Z'
+ and then (Prev_Token /= Tok_Apostrophe
+ or else
+ (Token /= Tok_Access and then
+ Token /= Tok_Delta and then
+ Token /= Tok_Digits and then
+ Token /= Tok_Mod and then
+ Token /= Tok_Range))
+ and then (Token /= Tok_Interface
+ or else
+ (Token = Tok_Interface
+ and then Prev_Token /= Tok_Pragma))
+ then
+ Style.Non_Lower_Case_Keyword;
+ end if;
+
+ -- Check THEN/ELSE style rules. These do not apply to AND THEN
+ -- or OR ELSE, and do not apply in conditional expressions.
+
+ if (Token = Tok_Then and then Prev_Token /= Tok_And)
+ or else
+ (Token = Tok_Else and then Prev_Token /= Tok_Or)
+ then
+ if Inside_Conditional_Expression = 0 then
+ Style.Check_Separate_Stmt_Lines;
+ end if;
+ end if;
end if;
-- We must reset Token_Name since this is not an identifier and
-- Outer loop keeps going only if a horizontal tab follows
if Source (Scan_Ptr) = HT then
- if Style_Check then Style.Check_HT; end if;
+ if Style_Check then
+ Style.Check_HT;
+ end if;
+
Scan_Ptr := Scan_Ptr + 1;
Start_Column := (Start_Column / 8) * 8 + 8;
else
exit Tabs_Loop;
end if;
-
end loop Tabs_Loop;
return Start_Column;
+
+ -- A constraint error can happen only if we have a compiler with checks on
+ -- and a line with a ludicrous number of tabs or spaces at the start. In
+ -- such a case, we really don't care if Start_Column is right or not.
+
+ exception
+ when Constraint_Error =>
+ return Start_Column;
end Set_Start_Column;
end Scng;