1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
9 -- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Namet; use Namet;
31 with Scans; use Scans;
32 with Sinput; use Sinput;
33 with Snames; use Snames;
34 with Stringt; use Stringt;
35 with Stylesw; use Stylesw;
36 with Uintp; use Uintp;
37 with Urealp; use Urealp;
38 with Widechar; use Widechar;
41 with System.WCh_Con; use System.WCh_Con;
43 with GNAT.UTF_32; use GNAT.UTF_32;
48 -- Make control characters visible
50 Special_Characters : array (Character) of Boolean := (others => False);
51 -- For characters that are Special token, the value is True
53 Comment_Is_Token : Boolean := False;
54 -- True if comments are tokens
56 End_Of_Line_Is_Token : Boolean := False;
57 -- True if End_Of_Line is a token
59 -----------------------
60 -- Local Subprograms --
61 -----------------------
63 procedure Accumulate_Token_Checksum;
64 pragma Inline (Accumulate_Token_Checksum);
66 procedure Accumulate_Checksum (C : Character);
67 pragma Inline (Accumulate_Checksum);
68 -- This routine accumulates the checksum given character C. During the
69 -- scanning of a source file, this routine is called with every character
70 -- in the source, excluding blanks, and all control characters (except
71 -- that ESC is included in the checksum). Upper case letters not in string
72 -- literals are folded by the caller. See Sinput spec for the documentation
73 -- of the checksum algorithm. Note: checksum values are only used if we
74 -- generate code, so it is not necessary to worry about making the right
75 -- sequence of calls in any error situation.
77 procedure Accumulate_Checksum (C : Char_Code);
78 pragma Inline (Accumulate_Checksum);
79 -- This version is identical, except that the argument, C, is a character
80 -- code value instead of a character. This is used when wide characters
81 -- are scanned. We use the character code rather than the ASCII characters
82 -- so that the checksum is independent of wide character encoding method.
84 procedure Initialize_Checksum;
85 pragma Inline (Initialize_Checksum);
86 -- Initialize checksum value
88 -------------------------
89 -- Accumulate_Checksum --
90 -------------------------
92 procedure Accumulate_Checksum (C : Character) is
94 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
95 end Accumulate_Checksum;
97 procedure Accumulate_Checksum (C : Char_Code) is
100 Accumulate_Checksum (Character'Val (C / 2 ** 24));
101 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
102 Accumulate_Checksum (Character'Val ((C / 256) mod 256));
104 Accumulate_Checksum (Character'Val (C / 256));
107 Accumulate_Checksum (Character'Val (C mod 256));
108 end Accumulate_Checksum;
110 -------------------------------
111 -- Accumulate_Token_Checksum --
112 -------------------------------
114 procedure Accumulate_Token_Checksum is
117 (System.CRC32.CRC32 (Checksum),
118 Character'Val (Token_Type'Pos (Token)));
119 end Accumulate_Token_Checksum;
121 ----------------------------
122 -- Determine_Token_Casing --
123 ----------------------------
125 function Determine_Token_Casing return Casing_Type is
127 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
128 end Determine_Token_Casing;
130 -------------------------
131 -- Initialize_Checksum --
132 -------------------------
134 procedure Initialize_Checksum is
136 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
137 end Initialize_Checksum;
139 ------------------------
140 -- Initialize_Scanner --
141 ------------------------
143 procedure Initialize_Scanner
144 (Unit : Unit_Number_Type;
145 Index : Source_File_Index)
147 procedure Set_Reserved (N : Name_Id; T : Token_Type);
148 pragma Inline (Set_Reserved);
149 -- Set given name as a reserved keyword (T is the corresponding token)
155 procedure Set_Reserved (N : Name_Id; T : Token_Type) is
157 -- Set up Token_Type values in Names Table entries for reserved
158 -- keywords We use the Pos value of the Token_Type value. Note we
159 -- rely on the fact that Token_Type'Val (0) is not a reserved word!
161 Set_Name_Table_Byte (N, Token_Type'Pos (T));
164 -- Start of processing for Initialize_Scanner
167 -- Establish reserved words
169 Set_Reserved (Name_Abort, Tok_Abort);
170 Set_Reserved (Name_Abs, Tok_Abs);
171 Set_Reserved (Name_Abstract, Tok_Abstract);
172 Set_Reserved (Name_Accept, Tok_Accept);
173 Set_Reserved (Name_Access, Tok_Access);
174 Set_Reserved (Name_And, Tok_And);
175 Set_Reserved (Name_Aliased, Tok_Aliased);
176 Set_Reserved (Name_All, Tok_All);
177 Set_Reserved (Name_Array, Tok_Array);
178 Set_Reserved (Name_At, Tok_At);
179 Set_Reserved (Name_Begin, Tok_Begin);
180 Set_Reserved (Name_Body, Tok_Body);
181 Set_Reserved (Name_Case, Tok_Case);
182 Set_Reserved (Name_Constant, Tok_Constant);
183 Set_Reserved (Name_Declare, Tok_Declare);
184 Set_Reserved (Name_Delay, Tok_Delay);
185 Set_Reserved (Name_Delta, Tok_Delta);
186 Set_Reserved (Name_Digits, Tok_Digits);
187 Set_Reserved (Name_Do, Tok_Do);
188 Set_Reserved (Name_Else, Tok_Else);
189 Set_Reserved (Name_Elsif, Tok_Elsif);
190 Set_Reserved (Name_End, Tok_End);
191 Set_Reserved (Name_Entry, Tok_Entry);
192 Set_Reserved (Name_Exception, Tok_Exception);
193 Set_Reserved (Name_Exit, Tok_Exit);
194 Set_Reserved (Name_For, Tok_For);
195 Set_Reserved (Name_Function, Tok_Function);
196 Set_Reserved (Name_Generic, Tok_Generic);
197 Set_Reserved (Name_Goto, Tok_Goto);
198 Set_Reserved (Name_If, Tok_If);
199 Set_Reserved (Name_In, Tok_In);
200 Set_Reserved (Name_Is, Tok_Is);
201 Set_Reserved (Name_Limited, Tok_Limited);
202 Set_Reserved (Name_Loop, Tok_Loop);
203 Set_Reserved (Name_Mod, Tok_Mod);
204 Set_Reserved (Name_New, Tok_New);
205 Set_Reserved (Name_Not, Tok_Not);
206 Set_Reserved (Name_Null, Tok_Null);
207 Set_Reserved (Name_Of, Tok_Of);
208 Set_Reserved (Name_Or, Tok_Or);
209 Set_Reserved (Name_Others, Tok_Others);
210 Set_Reserved (Name_Out, Tok_Out);
211 Set_Reserved (Name_Package, Tok_Package);
212 Set_Reserved (Name_Pragma, Tok_Pragma);
213 Set_Reserved (Name_Private, Tok_Private);
214 Set_Reserved (Name_Procedure, Tok_Procedure);
215 Set_Reserved (Name_Protected, Tok_Protected);
216 Set_Reserved (Name_Raise, Tok_Raise);
217 Set_Reserved (Name_Range, Tok_Range);
218 Set_Reserved (Name_Record, Tok_Record);
219 Set_Reserved (Name_Rem, Tok_Rem);
220 Set_Reserved (Name_Renames, Tok_Renames);
221 Set_Reserved (Name_Requeue, Tok_Requeue);
222 Set_Reserved (Name_Return, Tok_Return);
223 Set_Reserved (Name_Reverse, Tok_Reverse);
224 Set_Reserved (Name_Select, Tok_Select);
225 Set_Reserved (Name_Separate, Tok_Separate);
226 Set_Reserved (Name_Subtype, Tok_Subtype);
227 Set_Reserved (Name_Tagged, Tok_Tagged);
228 Set_Reserved (Name_Task, Tok_Task);
229 Set_Reserved (Name_Terminate, Tok_Terminate);
230 Set_Reserved (Name_Then, Tok_Then);
231 Set_Reserved (Name_Type, Tok_Type);
232 Set_Reserved (Name_Until, Tok_Until);
233 Set_Reserved (Name_Use, Tok_Use);
234 Set_Reserved (Name_When, Tok_When);
235 Set_Reserved (Name_While, Tok_While);
236 Set_Reserved (Name_With, Tok_With);
237 Set_Reserved (Name_Xor, Tok_Xor);
239 -- Ada 2005 reserved words
241 Set_Reserved (Name_Interface, Tok_Interface);
242 Set_Reserved (Name_Overriding, Tok_Overriding);
243 Set_Reserved (Name_Synchronized, Tok_Synchronized);
245 -- Initialize scan control variables
247 Current_Source_File := Index;
248 Source := Source_Text (Current_Source_File);
249 Current_Source_Unit := Unit;
250 Scan_Ptr := Source_First (Current_Source_File);
252 Token_Ptr := Scan_Ptr;
253 Current_Line_Start := Scan_Ptr;
255 Token_Name := No_Name;
256 Start_Column := Set_Start_Column;
257 First_Non_Blank_Location := Scan_Ptr;
261 -- Do not call Scan, otherwise the License stuff does not work in Scn
263 end Initialize_Scanner;
265 ------------------------------
266 -- Reset_Special_Characters --
267 ------------------------------
269 procedure Reset_Special_Characters is
271 Special_Characters := (others => False);
272 end Reset_Special_Characters;
280 Start_Of_Comment : Source_Ptr;
281 -- Record start of comment position
283 Underline_Found : Boolean;
284 -- During scanning of an identifier, set to True if last character
285 -- scanned was an underline or other punctuation character. This
286 -- is used to flag the error of two underlines/punctuations in a
287 -- row or ending an identifier with a underline/punctuation. Here
288 -- punctuation means any UTF_32 character in the Unicode category
289 -- Punctuation,Connector.
292 -- Used to remember start of last wide character scanned
294 procedure Check_End_Of_Line;
295 -- Called when end of line encountered. Checks that line is not too
296 -- long, and that other style checks for the end of line are met.
298 function Double_Char_Token (C : Character) return Boolean;
299 -- This function is used for double character tokens like := or <>. It
300 -- checks if the character following Source (Scan_Ptr) is C, and if so
301 -- bumps Scan_Ptr past the pair of characters and returns True. A space
302 -- between the two characters is also recognized with an appropriate
303 -- error message being issued. If C is not present, False is returned.
304 -- Note that Double_Char_Token can only be used for tokens defined in
305 -- the Ada syntax (it's use for error cases like && is not appropriate
306 -- since we do not want a junk message for a case like &-space-&).
308 procedure Error_Illegal_Character;
309 -- Give illegal character error, Scan_Ptr points to character. On
310 -- return, Scan_Ptr is bumped past the illegal character.
312 procedure Error_Illegal_Wide_Character;
313 -- Give illegal wide character message. On return, Scan_Ptr is bumped
314 -- past the illegal character, which may still leave us pointing to
315 -- junk, not much we can do if the escape sequence is messed up!
317 procedure Error_Long_Line;
318 -- Signal error of excessively long line
320 procedure Error_No_Double_Underline;
321 -- Signal error of two underline or punctuation characters in a row.
322 -- Called with Scan_Ptr pointing to second underline/punctuation char.
325 -- This is the procedure for scanning out numeric literals. On entry,
326 -- Scan_Ptr points to the digit that starts the numeric literal (the
327 -- checksum for this character has not been accumulated yet). On return
328 -- Scan_Ptr points past the last character of the numeric literal, Token
329 -- and Token_Node are set appropriately, and the checksum is updated.
332 -- This is the procedure for scanning out string literals. On entry,
333 -- Scan_Ptr points to the opening string quote (the checksum for this
334 -- character has not been accumulated yet). On return Scan_Ptr points
335 -- past the closing quote of the string literal, Token and Token_Node
336 -- are set appropriately, and the checksum is upated.
338 -----------------------
339 -- Check_End_Of_Line --
340 -----------------------
342 procedure Check_End_Of_Line is
343 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
347 Style.Check_Line_Terminator (Len);
350 -- Deal with checking maximum line length
352 if Style_Check and Style_Check_Max_Line_Length then
353 Style.Check_Line_Max_Length (Len);
355 -- If style checking is inactive, check maximum line length against
356 -- standard value. Note that we take this from Opt.Max_Line_Length
357 -- rather than Hostparm.Max_Line_Length because we do not want to
358 -- impose any limit during scanning of configuration pragma files,
359 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
360 -- is reset to Column_Number'Max during scanning of such files.
362 elsif Len > Opt.Max_Line_Length then
365 end Check_End_Of_Line;
367 -----------------------
368 -- Double_Char_Token --
369 -----------------------
371 function Double_Char_Token (C : Character) return Boolean is
373 if Source (Scan_Ptr + 1) = C then
374 Accumulate_Checksum (C);
375 Scan_Ptr := Scan_Ptr + 2;
378 elsif Source (Scan_Ptr + 1) = ' '
379 and then Source (Scan_Ptr + 2) = C
381 Scan_Ptr := Scan_Ptr + 1;
382 Error_Msg_S ("no space allowed here");
383 Scan_Ptr := Scan_Ptr + 2;
389 end Double_Char_Token;
391 -----------------------------
392 -- Error_Illegal_Character --
393 -----------------------------
395 procedure Error_Illegal_Character is
397 Error_Msg_S ("illegal character");
398 Scan_Ptr := Scan_Ptr + 1;
399 end Error_Illegal_Character;
401 ----------------------------------
402 -- Error_Illegal_Wide_Character --
403 ----------------------------------
405 procedure Error_Illegal_Wide_Character is
407 Error_Msg ("illegal wide character", Wptr);
408 end Error_Illegal_Wide_Character;
410 ---------------------
411 -- Error_Long_Line --
412 ---------------------
414 procedure Error_Long_Line is
417 ("this line is too long",
418 Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
421 -------------------------------
422 -- Error_No_Double_Underline --
423 -------------------------------
425 procedure Error_No_Double_Underline is
427 Underline_Found := False;
429 -- There are four cases, and we special case the messages
431 if Source (Scan_Ptr) = '_' then
432 if Source (Scan_Ptr - 1) = '_' then
434 ("two consecutive underlines not permitted");
437 ("underline cannot follow punctuation character");
441 if Source (Scan_Ptr - 1) = '_' then
443 ("punctuation character cannot follow underline");
446 ("two consecutive punctuation characters not permitted");
449 end Error_No_Double_Underline;
458 -- Current source program character
460 Base_Char : Character;
461 -- Either # or : (character at start of based number)
467 -- Value of base in Uint format
470 -- Value of integer scanned by Scan_Integer in Uint format
473 -- Value of integer in numeric value being scanned
476 -- Scale value for real literal
479 -- Scale in Uint format
481 Exponent_Is_Negative : Boolean;
482 -- Set true for negative exponent
484 Extended_Digit_Value : Int;
485 -- Extended digit value
487 Point_Scanned : Boolean;
488 -- Flag for decimal point scanned in numeric literal
490 -----------------------
491 -- Local Subprograms --
492 -----------------------
494 procedure Error_Digit_Expected;
495 -- Signal error of bad digit, Scan_Ptr points to the location at
496 -- which the digit was expected on input, and is unchanged on return.
498 procedure Scan_Integer;
499 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
500 -- digit, on exit Scan_Ptr points past the last character of the
503 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
504 -- the value of the digit added to the result. In addition, the
505 -- value in Scale is decremented by one for each actual digit
508 --------------------------
509 -- Error_Digit_Expected --
510 --------------------------
512 procedure Error_Digit_Expected is
514 Error_Msg_S ("digit expected");
515 end Error_Digit_Expected;
521 procedure Scan_Integer is
523 -- Next character scanned
526 C := Source (Scan_Ptr);
528 -- Loop through digits (allowing underlines)
531 Accumulate_Checksum (C);
533 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
534 Scan_Ptr := Scan_Ptr + 1;
536 C := Source (Scan_Ptr);
538 -- Case of underline encountered
542 -- We do not accumulate the '_' in the checksum, so that
543 -- 1_234 is equivalent to 1234, and does not trigger
544 -- compilation for "minimal recompilation" (gnatmake -m).
547 Scan_Ptr := Scan_Ptr + 1;
548 C := Source (Scan_Ptr);
550 Error_No_Double_Underline;
553 if C not in '0' .. '9' then
554 Error_Digit_Expected;
559 exit when C not in '0' .. '9';
564 -- Start of Processing for Nlit
569 UI_Int_Value := Uint_0;
573 Point_Scanned := False;
574 UI_Num_Value := UI_Int_Value;
576 -- Various possibilities now for continuing the literal are period,
577 -- E/e (for exponent), or :/# (for based literal).
580 C := Source (Scan_Ptr);
584 -- Scan out point, but do not scan past .. which is a range
585 -- sequence, and must not be eaten up scanning a numeric literal.
587 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
588 Accumulate_Checksum ('.');
590 if Point_Scanned then
591 Error_Msg_S ("duplicate point ignored");
594 Point_Scanned := True;
595 Scan_Ptr := Scan_Ptr + 1;
596 C := Source (Scan_Ptr);
598 if C not in '0' .. '9' then
600 ("real literal cannot end with point", Scan_Ptr - 1);
603 UI_Num_Value := UI_Int_Value;
607 -- Based literal case. The base is the value we already scanned.
608 -- In the case of colon, we insist that the following character
609 -- is indeed an extended digit or a period. This catches a number
610 -- of common errors, as well as catching the well known tricky
611 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
614 or else (C = ':' and then
615 (Source (Scan_Ptr + 1) = '.'
617 Source (Scan_Ptr + 1) in '0' .. '9'
619 Source (Scan_Ptr + 1) in 'A' .. 'Z'
621 Source (Scan_Ptr + 1) in 'a' .. 'z'))
624 Obsolescent_Check (Scan_Ptr);
626 if Warn_On_Obsolescent_Feature then
628 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
630 ("\use ""'#"" instead?");
634 Accumulate_Checksum (C);
636 UI_Base := UI_Int_Value;
638 if UI_Base < 2 or else UI_Base > 16 then
639 Error_Msg_SC ("base not 2-16");
643 Base := UI_To_Int (UI_Base);
644 Scan_Ptr := Scan_Ptr + 1;
646 -- Scan out extended integer [. integer]
648 C := Source (Scan_Ptr);
649 UI_Int_Value := Uint_0;
653 if C in '0' .. '9' then
654 Accumulate_Checksum (C);
655 Extended_Digit_Value :=
656 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
658 elsif C in 'A' .. 'F' then
659 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
660 Extended_Digit_Value :=
661 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
663 elsif C in 'a' .. 'f' then
664 Accumulate_Checksum (C);
665 Extended_Digit_Value :=
666 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
669 Error_Msg_S ("extended digit expected");
673 if Extended_Digit_Value >= Base then
674 Error_Msg_S ("digit '>= base");
677 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
679 Scan_Ptr := Scan_Ptr + 1;
680 C := Source (Scan_Ptr);
684 Accumulate_Checksum ('_');
685 Scan_Ptr := Scan_Ptr + 1;
686 C := Source (Scan_Ptr);
688 Error_No_Double_Underline;
692 Accumulate_Checksum ('.');
694 if Point_Scanned then
695 Error_Msg_S ("duplicate point ignored");
698 Scan_Ptr := Scan_Ptr + 1;
699 C := Source (Scan_Ptr);
700 Point_Scanned := True;
703 elsif C = Base_Char then
704 Accumulate_Checksum (C);
705 Scan_Ptr := Scan_Ptr + 1;
708 elsif C = '#' or else C = ':' then
709 Error_Msg_S ("based number delimiters must match");
710 Scan_Ptr := Scan_Ptr + 1;
713 elsif not Identifier_Char (C) then
714 if Base_Char = '#' then
715 Error_Msg_S ("missing '#");
717 Error_Msg_S ("missing ':");
725 UI_Num_Value := UI_Int_Value;
730 if not Point_Scanned then
734 UI_Scale := UI_From_Int (Scale);
737 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
738 Accumulate_Checksum ('e');
739 Scan_Ptr := Scan_Ptr + 1;
740 Exponent_Is_Negative := False;
742 if Source (Scan_Ptr) = '+' then
743 Accumulate_Checksum ('+');
744 Scan_Ptr := Scan_Ptr + 1;
746 elsif Source (Scan_Ptr) = '-' then
747 Accumulate_Checksum ('-');
749 if not Point_Scanned then
751 ("negative exponent not allowed for integer literal");
753 Exponent_Is_Negative := True;
756 Scan_Ptr := Scan_Ptr + 1;
759 UI_Int_Value := Uint_0;
761 if Source (Scan_Ptr) in '0' .. '9' then
764 Error_Digit_Expected;
767 if Exponent_Is_Negative then
768 UI_Scale := UI_Scale - UI_Int_Value;
770 UI_Scale := UI_Scale + UI_Int_Value;
774 -- Case of real literal to be returned
776 if Point_Scanned then
777 Token := Tok_Real_Literal;
778 Real_Literal_Value :=
784 -- Case of integer literal to be returned
787 Token := Tok_Integer_Literal;
790 Int_Literal_Value := UI_Num_Value;
792 -- Avoid doing possibly expensive calculations in cases like
793 -- parsing 163E800_000# when semantics will not be done anyway.
794 -- This is especially useful when parsing garbled input.
796 elsif Operating_Mode /= Check_Syntax
797 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
799 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
802 Int_Literal_Value := No_Uint;
806 Accumulate_Token_Checksum;
817 Delimiter : Character;
818 -- Delimiter (first character of string)
821 -- Current source program character
824 -- Current character code value
827 -- Error flag for Scan_Wide call
829 procedure Error_Bad_String_Char;
830 -- Signal bad character in string/character literal. On entry
831 -- Scan_Ptr points to the improper character encountered during the
832 -- scan. Scan_Ptr is not modified, so it still points to the bad
833 -- character on return.
835 procedure Error_Unterminated_String;
836 -- Procedure called if a line terminator character is encountered
837 -- during scanning a string, meaning that the string is not properly
840 procedure Set_String;
841 -- Procedure used to distinguish between string and operator symbol.
842 -- On entry the string has been scanned out, and its characters
843 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
844 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
845 -- appropriate, and Token_Node is appropriately initialized. In
846 -- addition, in the operator symbol case, Token_Name is
847 -- appropriately set.
849 ---------------------------
850 -- Error_Bad_String_Char --
851 ---------------------------
853 procedure Error_Bad_String_Char is
854 C : constant Character := Source (Scan_Ptr);
858 Error_Msg_S ("horizontal tab not allowed in string");
860 elsif C = VT or else C = FF then
861 Error_Msg_S ("format effector not allowed in string");
863 elsif C in Upper_Half_Character then
864 Error_Msg_S ("(Ada 83) upper half character not allowed");
867 Error_Msg_S ("control character not allowed in string");
869 end Error_Bad_String_Char;
871 -------------------------------
872 -- Error_Unterminated_String --
873 -------------------------------
875 procedure Error_Unterminated_String is
877 -- An interesting little refinement. Consider the following
880 -- A := "this is an unterminated string;
881 -- A := "this is an unterminated string &
882 -- P(A, "this is a parameter that didn't get terminated);
884 -- We fiddle a little to do slightly better placement in these
885 -- cases also if there is white space at the end of the line we
886 -- place the flag at the start of this white space, not at the
887 -- end. Note that we only have to test for blanks, since tabs
888 -- aren't allowed in strings in the first place and would have
889 -- caused an error message.
891 -- Two more cases that we treat specially are:
893 -- A := "this string uses the wrong terminator'
894 -- A := "this string uses the wrong terminator' &
896 -- In these cases we give a different error message as well
898 -- We actually reposition the scan pointer to the point where we
899 -- place the flag in these cases, since it seems a better bet on
900 -- the original intention.
902 while Source (Scan_Ptr - 1) = ' '
903 or else Source (Scan_Ptr - 1) = '&'
905 Scan_Ptr := Scan_Ptr - 1;
909 -- Check for case of incorrect string terminator, but single quote
910 -- is not considered incorrect if the opening terminator misused
911 -- a single quote (error message already given).
914 and then Source (Scan_Ptr - 1) = '''
918 ("incorrect string terminator character", Scan_Ptr - 1);
922 if Source (Scan_Ptr - 1) = ';' then
923 Scan_Ptr := Scan_Ptr - 1;
926 if Source (Scan_Ptr - 1) = ')' then
927 Scan_Ptr := Scan_Ptr - 1;
932 Error_Msg_S ("missing string quote");
933 end Error_Unterminated_String;
939 procedure Set_String is
940 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
946 -- Token_Name is currently set to Error_Name. The following
947 -- section of code resets Token_Name to the proper Name_Op_xx
948 -- value if the string is a valid operator symbol, otherwise it is
949 -- left set to Error_Name.
952 C1 := Source (Token_Ptr + 1);
956 Token_Name := Name_Op_Eq;
959 Token_Name := Name_Op_Gt;
962 Token_Name := Name_Op_Lt;
965 Token_Name := Name_Op_Add;
968 Token_Name := Name_Op_Subtract;
971 Token_Name := Name_Op_Concat;
974 Token_Name := Name_Op_Multiply;
977 Token_Name := Name_Op_Divide;
984 C1 := Source (Token_Ptr + 1);
985 C2 := Source (Token_Ptr + 2);
987 if C1 = '*' and then C2 = '*' then
988 Token_Name := Name_Op_Expon;
993 Token_Name := Name_Op_Ne;
995 Token_Name := Name_Op_Le;
997 Token_Name := Name_Op_Ge;
1000 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
1001 (C2 = 'R' or else C2 = 'r')
1003 Token_Name := Name_Op_Or;
1007 C1 := Source (Token_Ptr + 1);
1008 C2 := Source (Token_Ptr + 2);
1009 C3 := Source (Token_Ptr + 3);
1011 if (C1 = 'A' or else C1 = 'a') and then -- AND
1012 (C2 = 'N' or else C2 = 'n') and then
1013 (C3 = 'D' or else C3 = 'd')
1015 Token_Name := Name_Op_And;
1017 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
1018 (C2 = 'B' or else C2 = 'b') and then
1019 (C3 = 'S' or else C3 = 's')
1021 Token_Name := Name_Op_Abs;
1023 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
1024 (C2 = 'O' or else C2 = 'o') and then
1025 (C3 = 'D' or else C3 = 'd')
1027 Token_Name := Name_Op_Mod;
1029 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
1030 (C2 = 'O' or else C2 = 'o') and then
1031 (C3 = 'T' or else C3 = 't')
1033 Token_Name := Name_Op_Not;
1035 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
1036 (C2 = 'E' or else C2 = 'e') and then
1037 (C3 = 'M' or else C3 = 'm')
1039 Token_Name := Name_Op_Rem;
1041 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
1042 (C2 = 'O' or else C2 = 'o') and then
1043 (C3 = 'R' or else C3 = 'r')
1045 Token_Name := Name_Op_Xor;
1050 -- If it is an operator symbol, then Token_Name is set. If it is
1051 -- some other string value, then Token_Name still contains
1054 if Token_Name = Error_Name then
1055 Token := Tok_String_Literal;
1058 Token := Tok_Operator_Symbol;
1062 -- Start of processing for Slit
1065 -- On entry, Scan_Ptr points to the opening character of the string
1066 -- which is either a percent, double quote, or apostrophe (single
1067 -- quote). The latter case is an error detected by the character
1070 Delimiter := Source (Scan_Ptr);
1071 Accumulate_Checksum (Delimiter);
1073 Scan_Ptr := Scan_Ptr + 1;
1075 -- Loop to scan out characters of string literal
1078 C := Source (Scan_Ptr);
1080 if C = Delimiter then
1081 Accumulate_Checksum (C);
1082 Scan_Ptr := Scan_Ptr + 1;
1083 exit when Source (Scan_Ptr) /= Delimiter;
1084 Code := Get_Char_Code (C);
1085 Accumulate_Checksum (C);
1086 Scan_Ptr := Scan_Ptr + 1;
1089 if C = '"' and then Delimiter = '%' then
1091 ("quote not allowed in percent delimited string");
1092 Code := Get_Char_Code (C);
1093 Scan_Ptr := Scan_Ptr + 1;
1096 and then Wide_Character_Encoding_Method
1097 in WC_ESC_Encoding_Method)
1098 or else (C in Upper_Half_Character
1099 and then Upper_Half_Encoding)
1101 and then Source (Scan_Ptr + 1) = '"'
1102 and then Identifier_Char (Source (Scan_Ptr + 2)))
1105 Scan_Wide (Source, Scan_Ptr, Code, Err);
1108 Error_Illegal_Wide_Character;
1109 Code := Get_Char_Code (' ');
1112 Accumulate_Checksum (Code);
1114 -- In Ada 95 mode we allow any wide characters in a string
1115 -- but in Ada 2005, the set of characters allowed has been
1116 -- restricted to graphic characters.
1118 if Ada_Version >= Ada_05
1119 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1122 ("(Ada 2005) non-graphic character not permitted " &
1123 "in string literal", Wptr);
1127 Accumulate_Checksum (C);
1129 if C not in Graphic_Character then
1130 if C in Line_Terminator then
1131 Error_Unterminated_String;
1134 elsif C in Upper_Half_Character then
1135 if Ada_Version = Ada_83 then
1136 Error_Bad_String_Char;
1140 Error_Bad_String_Char;
1144 Code := Get_Char_Code (C);
1145 Scan_Ptr := Scan_Ptr + 1;
1149 Store_String_Char (Code);
1151 if not In_Character_Range (Code) then
1152 Wide_Character_Found := True;
1156 String_Literal_Id := End_String;
1161 -- Start of processing for Scan
1164 Prev_Token := Token;
1165 Prev_Token_Ptr := Token_Ptr;
1166 Token_Name := Error_Name;
1168 -- The following loop runs more than once only if a format effector
1169 -- (tab, vertical tab, form feed, line feed, carriage return) is
1170 -- encountered and skipped, or some error situation, such as an
1171 -- illegal character, is encountered.
1173 <<Scan_Next_Character>>
1176 -- Skip past blanks, loop is opened up for speed
1178 while Source (Scan_Ptr) = ' ' loop
1179 if Source (Scan_Ptr + 1) /= ' ' then
1180 Scan_Ptr := Scan_Ptr + 1;
1184 if Source (Scan_Ptr + 2) /= ' ' then
1185 Scan_Ptr := Scan_Ptr + 2;
1189 if Source (Scan_Ptr + 3) /= ' ' then
1190 Scan_Ptr := Scan_Ptr + 3;
1194 if Source (Scan_Ptr + 4) /= ' ' then
1195 Scan_Ptr := Scan_Ptr + 4;
1199 if Source (Scan_Ptr + 5) /= ' ' then
1200 Scan_Ptr := Scan_Ptr + 5;
1204 if Source (Scan_Ptr + 6) /= ' ' then
1205 Scan_Ptr := Scan_Ptr + 6;
1209 if Source (Scan_Ptr + 7) /= ' ' then
1210 Scan_Ptr := Scan_Ptr + 7;
1214 Scan_Ptr := Scan_Ptr + 8;
1217 -- We are now at a non-blank character, which is the first character
1218 -- of the token we will scan, and hence the value of Token_Ptr.
1220 Token_Ptr := Scan_Ptr;
1222 -- Here begins the main case statement which transfers control on the
1223 -- basis of the non-blank character we have encountered.
1225 case Source (Scan_Ptr) is
1227 -- Line terminator characters
1229 when CR | LF | FF | VT =>
1230 goto Scan_Line_Terminator;
1232 -- Horizontal tab, just skip past it
1235 if Style_Check then Style.Check_HT; end if;
1236 Scan_Ptr := Scan_Ptr + 1;
1238 -- End of file character, treated as an end of file only if it is
1239 -- the last character in the buffer, otherwise it is ignored.
1242 if Scan_Ptr = Source_Last (Current_Source_File) then
1244 if Style_Check then Style.Check_EOF; end if;
1248 Scan_Ptr := Scan_Ptr + 1;
1254 Accumulate_Checksum ('&');
1256 if Source (Scan_Ptr + 1) = '&' then
1257 Error_Msg_S ("'&'& should be `AND THEN`");
1258 Scan_Ptr := Scan_Ptr + 2;
1263 Scan_Ptr := Scan_Ptr + 1;
1264 Token := Tok_Ampersand;
1268 -- Asterisk (can be multiplication operator or double asterisk which
1269 -- is the exponentiation compound delimiter).
1272 Accumulate_Checksum ('*');
1274 if Source (Scan_Ptr + 1) = '*' then
1275 Accumulate_Checksum ('*');
1276 Scan_Ptr := Scan_Ptr + 2;
1277 Token := Tok_Double_Asterisk;
1281 Scan_Ptr := Scan_Ptr + 1;
1282 Token := Tok_Asterisk;
1286 -- Colon, which can either be an isolated colon, or part of an
1287 -- assignment compound delimiter.
1290 Accumulate_Checksum (':');
1292 if Double_Char_Token ('=') then
1293 Token := Tok_Colon_Equal;
1294 if Style_Check then Style.Check_Colon_Equal; end if;
1297 elsif Source (Scan_Ptr + 1) = '-'
1298 and then Source (Scan_Ptr + 2) /= '-'
1300 Token := Tok_Colon_Equal;
1301 Error_Msg (":- should be :=", Scan_Ptr);
1302 Scan_Ptr := Scan_Ptr + 2;
1306 Scan_Ptr := Scan_Ptr + 1;
1308 if Style_Check then Style.Check_Colon; end if;
1315 Accumulate_Checksum ('(');
1316 Scan_Ptr := Scan_Ptr + 1;
1317 Token := Tok_Left_Paren;
1318 if Style_Check then Style.Check_Left_Paren; end if;
1324 if Source (Scan_Ptr + 1) = '"' then
1325 goto Scan_Wide_Character;
1328 Error_Msg_S ("illegal character, replaced by ""(""");
1329 Scan_Ptr := Scan_Ptr + 1;
1330 Token := Tok_Left_Paren;
1337 Error_Msg_S ("illegal character, replaced by ""(""");
1338 Scan_Ptr := Scan_Ptr + 1;
1339 Token := Tok_Left_Paren;
1345 Accumulate_Checksum (',');
1346 Scan_Ptr := Scan_Ptr + 1;
1348 if Style_Check then Style.Check_Comma; end if;
1351 -- Dot, which is either an isolated period, or part of a double dot
1352 -- compound delimiter sequence. We also check for the case of a
1353 -- digit following the period, to give a better error message.
1356 Accumulate_Checksum ('.');
1358 if Double_Char_Token ('.') then
1359 Token := Tok_Dot_Dot;
1360 if Style_Check then Style.Check_Dot_Dot; end if;
1363 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1364 Error_Msg_S ("numeric literal cannot start with point");
1365 Scan_Ptr := Scan_Ptr + 1;
1368 Scan_Ptr := Scan_Ptr + 1;
1373 -- Equal, which can either be an equality operator, or part of the
1374 -- arrow (=>) compound delimiter.
1377 Accumulate_Checksum ('=');
1379 if Double_Char_Token ('>') then
1381 if Style_Check then Style.Check_Arrow; end if;
1384 elsif Source (Scan_Ptr + 1) = '=' then
1385 Error_Msg_S ("== should be =");
1386 Scan_Ptr := Scan_Ptr + 1;
1389 Scan_Ptr := Scan_Ptr + 1;
1393 -- Greater than, which can be a greater than operator, greater than
1394 -- or equal operator, or first character of a right label bracket.
1397 Accumulate_Checksum ('>');
1399 if Double_Char_Token ('=') then
1400 Token := Tok_Greater_Equal;
1403 elsif Double_Char_Token ('>') then
1404 Token := Tok_Greater_Greater;
1408 Scan_Ptr := Scan_Ptr + 1;
1409 Token := Tok_Greater;
1413 -- Less than, which can be a less than operator, less than or equal
1414 -- operator, or the first character of a left label bracket, or the
1415 -- first character of a box (<>) compound delimiter.
1418 Accumulate_Checksum ('<');
1420 if Double_Char_Token ('=') then
1421 Token := Tok_Less_Equal;
1424 elsif Double_Char_Token ('>') then
1426 if Style_Check then Style.Check_Box; end if;
1429 elsif Double_Char_Token ('<') then
1430 Token := Tok_Less_Less;
1434 Scan_Ptr := Scan_Ptr + 1;
1439 -- Minus, which is either a subtraction operator, or the first
1440 -- character of double minus starting a comment
1442 when '-' => Minus_Case : begin
1443 if Source (Scan_Ptr + 1) = '>' then
1444 Error_Msg_S ("invalid token");
1445 Scan_Ptr := Scan_Ptr + 2;
1449 elsif Source (Scan_Ptr + 1) /= '-' then
1450 Accumulate_Checksum ('-');
1451 Scan_Ptr := Scan_Ptr + 1;
1457 else -- Source (Scan_Ptr + 1) = '-' then
1458 if Style_Check then Style.Check_Comment; end if;
1459 Scan_Ptr := Scan_Ptr + 2;
1460 Start_Of_Comment := Scan_Ptr;
1462 -- Loop to scan comment (this loop runs more than once only if
1463 -- a horizontal tab or other non-graphic character is scanned)
1466 -- Scan to non graphic character (opened up for speed)
1468 -- Note that we just eat left brackets, which means that
1469 -- bracket notation cannot be used for end of line
1470 -- characters in comments. This seems a reasonable choice,
1471 -- since no one would ever use brackets notation in a real
1472 -- program in this situation, and if we allow brackets
1473 -- notation, we forbid some valid comments which contain a
1474 -- brackets sequence that happens to match an end of line
1478 exit when Source (Scan_Ptr) not in Graphic_Character;
1479 Scan_Ptr := Scan_Ptr + 1;
1480 exit when Source (Scan_Ptr) not in Graphic_Character;
1481 Scan_Ptr := Scan_Ptr + 1;
1482 exit when Source (Scan_Ptr) not in Graphic_Character;
1483 Scan_Ptr := Scan_Ptr + 1;
1484 exit when Source (Scan_Ptr) not in Graphic_Character;
1485 Scan_Ptr := Scan_Ptr + 1;
1486 exit when Source (Scan_Ptr) not in Graphic_Character;
1487 Scan_Ptr := Scan_Ptr + 1;
1490 -- Keep going if horizontal tab
1492 if Source (Scan_Ptr) = HT then
1493 if Style_Check then Style.Check_HT; end if;
1494 Scan_Ptr := Scan_Ptr + 1;
1496 -- Terminate scan of comment if line terminator
1498 elsif Source (Scan_Ptr) in Line_Terminator then
1501 -- Terminate scan of comment if end of file encountered
1502 -- (embedded EOF character or real last character in file)
1504 elsif Source (Scan_Ptr) = EOF then
1507 -- If we have a wide character, we have to scan it out,
1508 -- because it might be a legitimate line terminator
1510 elsif (Source (Scan_Ptr) = ESC
1511 and then Identifier_Char (ESC))
1513 (Source (Scan_Ptr) in Upper_Half_Character
1514 and then Upper_Half_Encoding)
1517 Wptr : constant Source_Ptr := Scan_Ptr;
1522 Scan_Wide (Source, Scan_Ptr, Code, Err);
1524 -- If not well formed wide character, then just skip
1525 -- past it and ignore it.
1528 Scan_Ptr := Wptr + 1;
1530 -- If UTF_32 terminator, terminate comment scan
1532 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1538 -- Keep going if character in 80-FF range, or is ESC. These
1539 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1540 -- They are allowed even in Ada 83 mode according to the
1541 -- approved AI. ESC was added to the AI in June 93.
1543 elsif Source (Scan_Ptr) in Upper_Half_Character
1544 or else Source (Scan_Ptr) = ESC
1546 Scan_Ptr := Scan_Ptr + 1;
1548 -- Otherwise we have an illegal comment character
1551 Error_Illegal_Character;
1555 -- Note that, except when comments are tokens, we do NOT
1556 -- execute a return here, instead we fall through to reexecute
1557 -- the scan loop to look for a token.
1559 if Comment_Is_Token then
1560 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1561 Name_Buffer (1 .. Name_Len) :=
1562 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1563 Comment_Id := Name_Find;
1564 Token := Tok_Comment;
1570 -- Double quote starting a string literal
1577 -- Percent starting a string literal
1580 Obsolescent_Check (Token_Ptr);
1582 if Warn_On_Obsolescent_Feature then
1584 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1586 ("\use """""" instead?");
1593 -- Apostrophe. This can either be the start of a character literal,
1594 -- or an isolated apostrophe used in a qualified expression or an
1595 -- attribute. We treat it as a character literal if it does not
1596 -- follow a right parenthesis, identifier, the keyword ALL or
1597 -- a literal. This means that we correctly treat constructs like:
1599 -- A := CHARACTER'('A');
1601 -- Note that RM-2.2(7) does not require a separator between
1602 -- "CHARACTER" and "'" in the above.
1604 when ''' => Char_Literal_Case : declare
1609 Accumulate_Checksum (''');
1610 Scan_Ptr := Scan_Ptr + 1;
1612 -- Here is where we make the test to distinguish the cases. Treat
1613 -- as apostrophe if previous token is an identifier, right paren
1614 -- or the reserved word "all" (latter case as in A.all'Address)
1615 -- (or the reserved word "project" in project files). Also treat
1616 -- it as apostrophe after a literal (this catches some legitimate
1617 -- cases, like A."abs"'Address, and also gives better error
1618 -- behavior for impossible cases like 123'xxx).
1620 if Prev_Token = Tok_Identifier
1621 or else Prev_Token = Tok_Right_Paren
1622 or else Prev_Token = Tok_All
1623 or else Prev_Token = Tok_Project
1624 or else Prev_Token in Token_Class_Literal
1626 Token := Tok_Apostrophe;
1627 if Style_Check then Style.Check_Apostrophe; end if;
1630 -- Otherwise the apostrophe starts a character literal
1633 -- Case of wide character literal
1635 if (Source (Scan_Ptr) = ESC
1637 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1639 (Source (Scan_Ptr) in Upper_Half_Character
1641 Upper_Half_Encoding)
1643 (Source (Scan_Ptr) = '['
1645 Source (Scan_Ptr + 1) = '"')
1648 Scan_Wide (Source, Scan_Ptr, Code, Err);
1649 Accumulate_Checksum (Code);
1652 Error_Illegal_Wide_Character;
1653 Code := Character'Pos (' ');
1655 -- In Ada 95 mode we allow any wide character in a character
1656 -- literal, but in Ada 2005, the set of characters allowed
1657 -- is restricted to graphic characters.
1659 elsif Ada_Version >= Ada_05
1660 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1663 ("(Ada 2005) non-graphic character not permitted " &
1664 "in character literal", Wptr);
1667 if Source (Scan_Ptr) /= ''' then
1668 Error_Msg_S ("missing apostrophe");
1670 Scan_Ptr := Scan_Ptr + 1;
1673 -- If we do not find a closing quote in the expected place then
1674 -- assume that we have a misguided attempt at a string literal.
1676 -- However, if previous token is RANGE, then we return an
1677 -- apostrophe instead since this gives better error recovery
1679 elsif Source (Scan_Ptr + 1) /= ''' then
1680 if Prev_Token = Tok_Range then
1681 Token := Tok_Apostrophe;
1685 Scan_Ptr := Scan_Ptr - 1;
1687 ("strings are delimited by double quote character");
1693 -- Otherwise we have a (non-wide) character literal
1696 Accumulate_Checksum (Source (Scan_Ptr));
1698 if Source (Scan_Ptr) not in Graphic_Character then
1699 if Source (Scan_Ptr) in Upper_Half_Character then
1700 if Ada_Version = Ada_83 then
1701 Error_Illegal_Character;
1705 Error_Illegal_Character;
1709 Code := Get_Char_Code (Source (Scan_Ptr));
1710 Scan_Ptr := Scan_Ptr + 2;
1713 -- Fall through here with Scan_Ptr updated past the closing
1714 -- quote, and Code set to the Char_Code value for the literal
1716 Accumulate_Checksum (''');
1717 Token := Tok_Char_Literal;
1718 Set_Character_Literal_Name (Code);
1719 Token_Name := Name_Find;
1720 Character_Code := Code;
1724 end Char_Literal_Case;
1726 -- Right parenthesis
1729 Accumulate_Checksum (')');
1730 Scan_Ptr := Scan_Ptr + 1;
1731 Token := Tok_Right_Paren;
1732 if Style_Check then Style.Check_Right_Paren; end if;
1735 -- Right bracket or right brace, treated as right paren
1738 Error_Msg_S ("illegal character, replaced by "")""");
1739 Scan_Ptr := Scan_Ptr + 1;
1740 Token := Tok_Right_Paren;
1743 -- Slash (can be division operator or first character of not equal)
1746 Accumulate_Checksum ('/');
1748 if Double_Char_Token ('=') then
1749 Token := Tok_Not_Equal;
1752 Scan_Ptr := Scan_Ptr + 1;
1760 Accumulate_Checksum (';');
1761 Scan_Ptr := Scan_Ptr + 1;
1762 Token := Tok_Semicolon;
1763 if Style_Check then Style.Check_Semicolon; end if;
1768 when '|' => Vertical_Bar_Case : begin
1769 Accumulate_Checksum ('|');
1771 -- Special check for || to give nice message
1773 if Source (Scan_Ptr + 1) = '|' then
1774 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1775 Scan_Ptr := Scan_Ptr + 2;
1780 Scan_Ptr := Scan_Ptr + 1;
1781 Token := Tok_Vertical_Bar;
1782 if Style_Check then Style.Check_Vertical_Bar; end if;
1785 end Vertical_Bar_Case;
1787 -- Exclamation, replacement character for vertical bar
1789 when '!' => Exclamation_Case : begin
1790 Accumulate_Checksum ('!');
1791 Obsolescent_Check (Token_Ptr);
1793 if Warn_On_Obsolescent_Feature then
1795 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1797 ("\use ""'|"" instead?");
1800 if Source (Scan_Ptr + 1) = '=' then
1801 Error_Msg_S ("'!= should be /=");
1802 Scan_Ptr := Scan_Ptr + 2;
1803 Token := Tok_Not_Equal;
1807 Scan_Ptr := Scan_Ptr + 1;
1808 Token := Tok_Vertical_Bar;
1811 end Exclamation_Case;
1815 when '+' => Plus_Case : begin
1816 Accumulate_Checksum ('+');
1817 Scan_Ptr := Scan_Ptr + 1;
1822 -- Digits starting a numeric literal
1827 if Identifier_Char (Source (Scan_Ptr)) then
1829 ("delimiter required between literal and identifier");
1834 -- Lower case letters
1838 Underline_Found := False;
1839 Name_Buffer (1) := Source (Scan_Ptr);
1840 Accumulate_Checksum (Name_Buffer (1));
1841 Scan_Ptr := Scan_Ptr + 1;
1842 goto Scan_Identifier;
1844 -- Upper case letters
1848 Underline_Found := False;
1850 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1851 Accumulate_Checksum (Name_Buffer (1));
1852 Scan_Ptr := Scan_Ptr + 1;
1853 goto Scan_Identifier;
1855 -- Underline character
1858 if Special_Characters ('_') then
1859 Token_Ptr := Scan_Ptr;
1860 Scan_Ptr := Scan_Ptr + 1;
1861 Token := Tok_Special;
1862 Special_Character := '_';
1866 Error_Msg_S ("identifier cannot start with underline");
1868 Name_Buffer (1) := '_';
1869 Scan_Ptr := Scan_Ptr + 1;
1870 Underline_Found := False;
1871 goto Scan_Identifier;
1873 -- Space (not possible, because we scanned past blanks)
1876 raise Program_Error;
1878 -- Characters in top half of ASCII 8-bit chart
1880 when Upper_Half_Character =>
1882 -- Wide character case
1884 if Upper_Half_Encoding then
1885 goto Scan_Wide_Character;
1887 -- Otherwise we have OK Latin-1 character
1890 -- Upper half characters may possibly be identifier letters
1891 -- but can never be digits, so Identifier_Char can be used to
1892 -- test for a valid start of identifier character.
1894 if Identifier_Char (Source (Scan_Ptr)) then
1896 Underline_Found := False;
1897 goto Scan_Identifier;
1899 Error_Illegal_Character;
1905 -- ESC character, possible start of identifier if wide characters
1906 -- using ESC encoding are allowed in identifiers, which we can
1907 -- tell by looking at the Identifier_Char flag for ESC, which is
1908 -- only true if these conditions are met. In Ada 2005 mode, may
1909 -- also be valid UTF_32 space or line terminator character.
1911 if Identifier_Char (ESC) then
1913 goto Scan_Wide_Character;
1915 Error_Illegal_Character;
1918 -- Invalid control characters
1920 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
1921 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1922 EM | FS | GS | RS | US | DEL
1924 Error_Illegal_Character;
1926 -- Invalid graphic characters
1928 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1930 -- If Set_Special_Character has been called for this character,
1931 -- set Scans.Special_Character and return a Special token.
1933 if Special_Characters (Source (Scan_Ptr)) then
1934 Token_Ptr := Scan_Ptr;
1935 Token := Tok_Special;
1936 Special_Character := Source (Scan_Ptr);
1937 Scan_Ptr := Scan_Ptr + 1;
1940 -- Otherwise, this is an illegal character
1943 Error_Illegal_Character;
1946 -- End switch on non-blank character
1950 -- End loop past format effectors. The exit from this loop is by
1951 -- executing a return statement following completion of token scan
1952 -- (control never falls out of this loop to the code which follows)
1956 -- Wide_Character scanning routine. On entry we have encountered the
1957 -- initial character of a wide character sequence.
1959 <<Scan_Wide_Character>>
1968 Scan_Wide (Source, Scan_Ptr, Code, Err);
1970 -- If bad wide character, signal error and continue scan
1973 Error_Illegal_Wide_Character;
1974 goto Scan_Next_Character;
1977 Cat := Get_Category (UTF_32 (Code));
1979 -- If OK letter, reset scan ptr and go scan identifier
1981 if Is_UTF_32_Letter (Cat) then
1984 Underline_Found := False;
1985 goto Scan_Identifier;
1987 -- If OK wide space, ignore and keep scanning (we do not include
1988 -- any ignored spaces in checksum)
1990 elsif Is_UTF_32_Space (Cat) then
1991 goto Scan_Next_Character;
1993 -- If OK wide line terminator, terminate current line
1995 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1997 goto Scan_Line_Terminator;
1999 -- Punctuation is an error (at start of identifier)
2001 elsif Is_UTF_32_Punctuation (Cat) then
2003 ("identifier cannot start with punctuation", Wptr);
2006 Underline_Found := False;
2007 goto Scan_Identifier;
2009 -- Mark character is an error (at start of identifer)
2011 elsif Is_UTF_32_Mark (Cat) then
2013 ("identifier cannot start with mark character", Wptr);
2016 Underline_Found := False;
2017 goto Scan_Identifier;
2019 -- Other format character is an error (at start of identifer)
2021 elsif Is_UTF_32_Other (Cat) then
2023 ("identifier cannot start with other format character", Wptr);
2026 Underline_Found := False;
2027 goto Scan_Identifier;
2029 -- Extended digit character is an error. Could be bad start of
2030 -- identifier or bad literal. Not worth doing too much to try to
2031 -- distinguish these cases, but we will do a little bit.
2033 elsif Is_UTF_32_Digit (Cat) then
2035 ("identifier cannot start with digit character", Wptr);
2038 Underline_Found := False;
2039 goto Scan_Identifier;
2041 -- All other wide characters are illegal here
2044 Error_Illegal_Wide_Character;
2045 goto Scan_Next_Character;
2049 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2050 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2051 -- that is treated as a line termiantor.
2053 <<Scan_Line_Terminator>>
2055 -- Check line too long
2059 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2062 if End_Of_Line_Is_Token then
2063 Token_Ptr := Scan_Ptr;
2070 Skip_Line_Terminators (Scan_Ptr, Physical);
2072 -- If we are at start of physical line, update scan pointers to
2073 -- reflect the start of the new line.
2076 Current_Line_Start := Scan_Ptr;
2077 Start_Column := Set_Start_Column;
2078 First_Non_Blank_Location := Scan_Ptr;
2080 -- If End_Of_Line is a token, we return it as it is a
2083 if End_Of_Line_Is_Token then
2084 Token := Tok_End_Of_Line;
2090 goto Scan_Next_Character;
2092 -- Identifier scanning routine. On entry, some initial characters of
2093 -- the identifier may have already been stored in Name_Buffer. If so,
2094 -- Name_Len has the number of characters stored. otherwise Name_Len is
2095 -- set to zero on entry. Underline_Found is also set False on entry.
2099 -- This loop scans as fast as possible past lower half letters and
2100 -- digits, which we expect to be the most common characters.
2103 if Source (Scan_Ptr) in 'a' .. 'z'
2104 or else Source (Scan_Ptr) in '0' .. '9'
2106 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2107 Accumulate_Checksum (Source (Scan_Ptr));
2109 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2110 Name_Buffer (Name_Len + 1) :=
2111 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2112 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2118 Underline_Found := False;
2119 Scan_Ptr := Scan_Ptr + 1;
2120 Name_Len := Name_Len + 1;
2123 -- If we fall through, then we have encountered either an underline
2124 -- character, or an extended identifier character (i.e. one from the
2125 -- upper half), or a wide character, or an identifier terminator. The
2126 -- initial test speeds us up in the most common case where we have
2127 -- an identifier terminator. Note that ESC is an identifier character
2128 -- only if a wide character encoding method that uses ESC encoding
2129 -- is active, so if we find an ESC character we know that we have a
2132 if Identifier_Char (Source (Scan_Ptr)) then
2134 -- Case of underline
2136 if Source (Scan_Ptr) = '_' then
2137 Accumulate_Checksum ('_');
2139 if Underline_Found then
2140 Error_No_Double_Underline;
2142 Underline_Found := True;
2143 Name_Len := Name_Len + 1;
2144 Name_Buffer (Name_Len) := '_';
2147 Scan_Ptr := Scan_Ptr + 1;
2148 goto Scan_Identifier;
2150 -- Upper half character
2152 elsif Source (Scan_Ptr) in Upper_Half_Character
2153 and then not Upper_Half_Encoding
2155 Accumulate_Checksum (Source (Scan_Ptr));
2156 Store_Encoded_Character
2157 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2158 Scan_Ptr := Scan_Ptr + 1;
2159 Underline_Found := False;
2160 goto Scan_Identifier;
2162 -- Left bracket not followed by a quote terminates an identifier.
2163 -- This is an error, but we don't want to give a junk error msg
2164 -- about wide characters in this case!
2166 elsif Source (Scan_Ptr) = '['
2167 and then Source (Scan_Ptr + 1) /= '"'
2171 -- We know we have a wide character encoding here (the current
2172 -- character is either ESC, left bracket, or an upper half
2173 -- character depending on the encoding method).
2176 -- Scan out the wide character and insert the appropriate
2177 -- encoding into the name table entry for the identifier.
2187 Scan_Wide (Source, Scan_Ptr, Code, Err);
2189 -- If error, signal error
2192 Error_Illegal_Wide_Character;
2194 -- If the character scanned is a normal identifier
2195 -- character, then we treat it that way.
2197 elsif In_Character_Range (Code)
2198 and then Identifier_Char (Get_Character (Code))
2200 Chr := Get_Character (Code);
2201 Accumulate_Checksum (Chr);
2202 Store_Encoded_Character
2203 (Get_Char_Code (Fold_Lower (Chr)));
2204 Underline_Found := False;
2206 -- Here if not a normal identifier character
2209 -- Make sure we are allowing wide characters in
2210 -- identifiers. Note that we allow wide character
2211 -- notation for an OK identifier character. This in
2212 -- particular allows bracket or other notation to be
2213 -- used for upper half letters.
2215 -- Wide characters are always allowed in Ada 2005
2217 if Identifier_Character_Set /= 'w'
2218 and then Ada_Version < Ada_05
2221 ("wide character not allowed in identifier", Wptr);
2224 Cat := Get_Category (UTF_32 (Code));
2226 -- If OK letter, store it folding to upper case. Note
2227 -- that we include the folded letter in the checksum.
2229 if Is_UTF_32_Letter (Cat) then
2231 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2232 Accumulate_Checksum (Code);
2233 Store_Encoded_Character (Code);
2234 Underline_Found := False;
2236 -- If OK extended digit or mark, then store it
2238 elsif Is_UTF_32_Digit (Cat)
2239 or else Is_UTF_32_Mark (Cat)
2241 Accumulate_Checksum (Code);
2242 Store_Encoded_Character (Code);
2243 Underline_Found := False;
2245 -- Wide punctuation is also stored, but counts as an
2246 -- underline character for error checking purposes.
2248 elsif Is_UTF_32_Punctuation (Cat) then
2249 Accumulate_Checksum (Code);
2251 if Underline_Found then
2253 Cend : constant Source_Ptr := Scan_Ptr;
2256 Error_No_Double_Underline;
2261 Store_Encoded_Character (Code);
2262 Underline_Found := True;
2265 -- Wide character in Unicode cateogory "Other, Format"
2266 -- is accepted in an identifier, but is ignored and not
2267 -- stored. It seems reasonable to exclude it from the
2270 -- Note that it is correct (see AI-395) to simply strip
2271 -- other format characters, before testing for double
2272 -- underlines, or for reserved words).
2274 elsif Is_UTF_32_Other (Cat) then
2277 -- Wide character in category Separator,Space terminates
2279 elsif Is_UTF_32_Space (Cat) then
2280 goto Scan_Identifier_Complete;
2282 -- Any other wide character is not acceptable
2286 ("invalid wide character in identifier", Wptr);
2290 goto Scan_Identifier;
2295 -- Scan of identifier is complete. The identifier is stored in
2296 -- Name_Buffer, and Scan_Ptr points past the last character.
2298 <<Scan_Identifier_Complete>>
2299 Token_Name := Name_Find;
2301 -- Check for identifier ending with underline or punctuation char
2303 if Underline_Found then
2304 Underline_Found := False;
2306 if Source (Scan_Ptr - 1) = '_' then
2308 ("identifier cannot end with underline", Scan_Ptr - 1);
2311 ("identifier cannot end with punctuation character", Wptr);
2315 -- Here is where we check if it was a keyword
2317 if Get_Name_Table_Byte (Token_Name) /= 0
2318 and then (Ada_Version >= Ada_95
2319 or else Token_Name not in Ada_95_Reserved_Words)
2320 and then (Ada_Version >= Ada_05
2321 or else Token_Name not in Ada_2005_Reserved_Words)
2323 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2325 -- Deal with possible style check for non-lower case keyword, but
2326 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2327 -- this purpose if they appear as attribute designators. Actually
2328 -- we only check the first character for speed.
2330 -- Ada 2005 (AI-284): Do not apply the style check in case of
2331 -- "pragma Interface"
2334 and then Source (Token_Ptr) <= 'Z'
2335 and then (Prev_Token /= Tok_Apostrophe
2337 (Token /= Tok_Access
2338 and then Token /= Tok_Delta
2339 and then Token /= Tok_Digits
2340 and then Token /= Tok_Range))
2341 and then (Token /= Tok_Interface
2343 (Token = Tok_Interface
2344 and then Prev_Token /= Tok_Pragma))
2346 Style.Non_Lower_Case_Keyword;
2349 -- We must reset Token_Name since this is not an identifier and
2350 -- if we leave Token_Name set, the parser gets confused because
2351 -- it thinks it is dealing with an identifier instead of the
2352 -- corresponding keyword.
2354 Token_Name := No_Name;
2355 Accumulate_Token_Checksum;
2358 -- It is an identifier after all
2361 Token := Tok_Identifier;
2362 Accumulate_Token_Checksum;
2368 --------------------------
2369 -- Set_Comment_As_Token --
2370 --------------------------
2372 procedure Set_Comment_As_Token (Value : Boolean) is
2374 Comment_Is_Token := Value;
2375 end Set_Comment_As_Token;
2377 ------------------------------
2378 -- Set_End_Of_Line_As_Token --
2379 ------------------------------
2381 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2383 End_Of_Line_Is_Token := Value;
2384 end Set_End_Of_Line_As_Token;
2386 ---------------------------
2387 -- Set_Special_Character --
2388 ---------------------------
2390 procedure Set_Special_Character (C : Character) is
2393 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2394 Special_Characters (C) := True;
2399 end Set_Special_Character;
2401 ----------------------
2402 -- Set_Start_Column --
2403 ----------------------
2405 -- Note: it seems at first glance a little expensive to compute this value
2406 -- for every source line (since it is certainly not used for all source
2407 -- lines). On the other hand, it doesn't take much more work to skip past
2408 -- the initial white space on the line counting the columns than it would
2409 -- to scan past the white space using the standard scanning circuits.
2411 function Set_Start_Column return Column_Number is
2412 Start_Column : Column_Number := 0;
2415 -- Outer loop scans past horizontal tab characters
2419 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2420 -- past the blanks and adjusting Start_Column to account for them.
2423 if Source (Scan_Ptr) = ' ' then
2424 if Source (Scan_Ptr + 1) = ' ' then
2425 if Source (Scan_Ptr + 2) = ' ' then
2426 if Source (Scan_Ptr + 3) = ' ' then
2427 if Source (Scan_Ptr + 4) = ' ' then
2428 if Source (Scan_Ptr + 5) = ' ' then
2429 if Source (Scan_Ptr + 6) = ' ' then
2430 Scan_Ptr := Scan_Ptr + 7;
2431 Start_Column := Start_Column + 7;
2433 Scan_Ptr := Scan_Ptr + 6;
2434 Start_Column := Start_Column + 6;
2438 Scan_Ptr := Scan_Ptr + 5;
2439 Start_Column := Start_Column + 5;
2443 Scan_Ptr := Scan_Ptr + 4;
2444 Start_Column := Start_Column + 4;
2448 Scan_Ptr := Scan_Ptr + 3;
2449 Start_Column := Start_Column + 3;
2453 Scan_Ptr := Scan_Ptr + 2;
2454 Start_Column := Start_Column + 2;
2458 Scan_Ptr := Scan_Ptr + 1;
2459 Start_Column := Start_Column + 1;
2465 end loop Blanks_Loop;
2467 -- Outer loop keeps going only if a horizontal tab follows
2469 if Source (Scan_Ptr) = HT then
2470 if Style_Check then Style.Check_HT; end if;
2471 Scan_Ptr := Scan_Ptr + 1;
2472 Start_Column := (Start_Column / 8) * 8 + 8;
2479 return Start_Column;
2480 end Set_Start_Column;