1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
9 -- Copyright (C) 1992-2004 Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Hostparm; use Hostparm;
30 with Namet; use Namet;
32 with Scans; use Scans;
33 with Sinput; use Sinput;
34 with Snames; use Snames;
35 with Stringt; use Stringt;
36 with Stylesw; use Stylesw;
37 with Uintp; use Uintp;
38 with Urealp; use Urealp;
39 with Widechar; use Widechar;
42 with System.WCh_Con; use System.WCh_Con;
47 -- Make control characters visible
49 Special_Characters : array (Character) of Boolean := (others => False);
50 -- For characters that are Special token, the value is True
52 Comment_Is_Token : Boolean := False;
53 -- True if comments are tokens
55 End_Of_Line_Is_Token : Boolean := False;
56 -- True if End_Of_Line is a token
58 -----------------------
59 -- Local Subprograms --
60 -----------------------
62 procedure Accumulate_Token_Checksum;
63 pragma Inline (Accumulate_Token_Checksum);
65 procedure Accumulate_Checksum (C : Character);
66 pragma Inline (Accumulate_Checksum);
67 -- This routine accumulates the checksum given character C. During the
68 -- scanning of a source file, this routine is called with every character
69 -- in the source, excluding blanks, and all control characters (except
70 -- that ESC is included in the checksum). Upper case letters not in string
71 -- literals are folded by the caller. See Sinput spec for the documentation
72 -- of the checksum algorithm. Note: checksum values are only used if we
73 -- generate code, so it is not necessary to worry about making the right
74 -- sequence of calls in any error situation.
76 procedure Accumulate_Checksum (C : Char_Code);
77 pragma Inline (Accumulate_Checksum);
78 -- This version is identical, except that the argument, C, is a character
79 -- code value instead of a character. This is used when wide characters
80 -- are scanned. We use the character code rather than the ASCII characters
81 -- so that the checksum is independent of wide character encoding method.
83 procedure Initialize_Checksum;
84 pragma Inline (Initialize_Checksum);
85 -- Initialize checksum value
87 -------------------------
88 -- Accumulate_Checksum --
89 -------------------------
91 procedure Accumulate_Checksum (C : Character) is
93 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
94 end Accumulate_Checksum;
96 procedure Accumulate_Checksum (C : Char_Code) is
98 Accumulate_Checksum (Character'Val (C / 256));
99 Accumulate_Checksum (Character'Val (C mod 256));
100 end Accumulate_Checksum;
102 -------------------------------
103 -- Accumulate_Token_Checksum --
104 -------------------------------
106 procedure Accumulate_Token_Checksum is
109 (System.CRC32.CRC32 (Checksum),
110 Character'Val (Token_Type'Pos (Token)));
111 end Accumulate_Token_Checksum;
113 ----------------------------
114 -- Determine_Token_Casing --
115 ----------------------------
117 function Determine_Token_Casing return Casing_Type is
119 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
120 end Determine_Token_Casing;
122 -------------------------
123 -- Initialize_Checksum --
124 -------------------------
126 procedure Initialize_Checksum is
128 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
129 end Initialize_Checksum;
131 ------------------------
132 -- Initialize_Scanner --
133 ------------------------
135 procedure Initialize_Scanner
136 (Unit : Unit_Number_Type;
137 Index : Source_File_Index)
140 -- Set up Token_Type values in Names Table entries for reserved keywords
141 -- We use the Pos value of the Token_Type value. Note we are relying on
142 -- the fact that Token_Type'Val (0) is not a reserved word!
144 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
145 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
146 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
147 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
148 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
149 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
150 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
151 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
152 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
153 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
154 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
155 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
156 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
157 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
158 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
159 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
160 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
161 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
162 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
163 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
164 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
165 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
166 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
167 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
168 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
169 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
170 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
171 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
172 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
173 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
174 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
175 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
176 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
177 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
178 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
179 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
180 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
181 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
182 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
183 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
184 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
185 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
186 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
187 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
188 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
189 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
190 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
191 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
192 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
193 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
194 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
195 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
196 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
197 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
198 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
199 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
200 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
201 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
202 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
203 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
204 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
205 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
206 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
207 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
208 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
209 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
210 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
211 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
212 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
214 -- Initialize scan control variables
216 Current_Source_File := Index;
217 Source := Source_Text (Current_Source_File);
218 Current_Source_Unit := Unit;
219 Scan_Ptr := Source_First (Current_Source_File);
221 Token_Ptr := Scan_Ptr;
222 Current_Line_Start := Scan_Ptr;
224 Token_Name := No_Name;
225 Start_Column := Set_Start_Column;
226 First_Non_Blank_Location := Scan_Ptr;
230 -- Do not call Scan, otherwise the License stuff does not work in Scn.
232 end Initialize_Scanner;
234 ------------------------------
235 -- Reset_Special_Characters --
236 ------------------------------
238 procedure Reset_Special_Characters is
240 Special_Characters := (others => False);
241 end Reset_Special_Characters;
249 Start_Of_Comment : Source_Ptr;
251 procedure Check_End_Of_Line;
252 -- Called when end of line encountered. Checks that line is not
253 -- too long, and that other style checks for the end of line are met.
255 function Double_Char_Token (C : Character) return Boolean;
256 -- This function is used for double character tokens like := or <>. It
257 -- checks if the character following Source (Scan_Ptr) is C, and if so
258 -- bumps Scan_Ptr past the pair of characters and returns True. A space
259 -- between the two characters is also recognized with an appropriate
260 -- error message being issued. If C is not present, False is returned.
261 -- Note that Double_Char_Token can only be used for tokens defined in
262 -- the Ada syntax (it's use for error cases like && is not appropriate
263 -- since we do not want a junk message for a case like &-space-&).
265 procedure Error_Illegal_Character;
266 -- Give illegal character error, Scan_Ptr points to character.
267 -- On return, Scan_Ptr is bumped past the illegal character.
269 procedure Error_Illegal_Wide_Character;
270 -- Give illegal wide character message. On return, Scan_Ptr is bumped
271 -- past the illegal character, which may still leave us pointing to
272 -- junk, not much we can do if the escape sequence is messed up!
274 procedure Error_Long_Line;
275 -- Signal error of excessively long line
277 procedure Error_No_Double_Underline;
278 -- Signal error of double underline character
281 -- This is the procedure for scanning out numeric literals. On entry,
282 -- Scan_Ptr points to the digit that starts the numeric literal (the
283 -- checksum for this character has not been accumulated yet). On return
284 -- Scan_Ptr points past the last character of the numeric literal, Token
285 -- and Token_Node are set appropriately, and the checksum is updated.
288 -- This is the procedure for scanning out string literals. On entry,
289 -- Scan_Ptr points to the opening string quote (the checksum for this
290 -- character has not been accumulated yet). On return Scan_Ptr points
291 -- past the closing quote of the string literal, Token and Token_Node
292 -- are set appropriately, and the checksum is upated.
294 -----------------------
295 -- Check_End_Of_Line --
296 -----------------------
298 procedure Check_End_Of_Line is
299 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
302 if Style_Check and Style_Check_Max_Line_Length then
303 Style.Check_Line_Terminator (Len);
305 elsif Len > Hostparm.Max_Line_Length then
308 end Check_End_Of_Line;
310 -----------------------
311 -- Double_Char_Token --
312 -----------------------
314 function Double_Char_Token (C : Character) return Boolean is
316 if Source (Scan_Ptr + 1) = C then
317 Accumulate_Checksum (C);
318 Scan_Ptr := Scan_Ptr + 2;
321 elsif Source (Scan_Ptr + 1) = ' '
322 and then Source (Scan_Ptr + 2) = C
324 Scan_Ptr := Scan_Ptr + 1;
325 Error_Msg_S ("no space allowed here");
326 Scan_Ptr := Scan_Ptr + 2;
332 end Double_Char_Token;
334 -----------------------------
335 -- Error_Illegal_Character --
336 -----------------------------
338 procedure Error_Illegal_Character is
340 Error_Msg_S ("illegal character");
341 Scan_Ptr := Scan_Ptr + 1;
342 end Error_Illegal_Character;
344 ----------------------------------
345 -- Error_Illegal_Wide_Character --
346 ----------------------------------
348 procedure Error_Illegal_Wide_Character is
350 Error_Msg_S ("illegal wide character, check -gnatW switch");
351 Scan_Ptr := Scan_Ptr + 1;
352 end Error_Illegal_Wide_Character;
354 ---------------------
355 -- Error_Long_Line --
356 ---------------------
358 procedure Error_Long_Line is
361 ("this line is too long",
362 Current_Line_Start + Hostparm.Max_Line_Length);
365 -------------------------------
366 -- Error_No_Double_Underline --
367 -------------------------------
369 procedure Error_No_Double_Underline is
371 Error_Msg_S ("two consecutive underlines not permitted");
372 end Error_No_Double_Underline;
381 -- Current source program character
383 Base_Char : Character;
384 -- Either # or : (character at start of based number)
390 -- Value of base in Uint format
393 -- Value of integer scanned by Scan_Integer in Uint format
396 -- Value of integer in numeric value being scanned
399 -- Scale value for real literal
402 -- Scale in Uint format
404 Exponent_Is_Negative : Boolean;
405 -- Set true for negative exponent
407 Extended_Digit_Value : Int;
408 -- Extended digit value
410 Point_Scanned : Boolean;
411 -- Flag for decimal point scanned in numeric literal
413 -----------------------
414 -- Local Subprograms --
415 -----------------------
417 procedure Error_Digit_Expected;
418 -- Signal error of bad digit, Scan_Ptr points to the location at
419 -- which the digit was expected on input, and is unchanged on return.
421 procedure Scan_Integer;
422 -- Procedure to scan integer literal. On entry, Scan_Ptr points to
423 -- a digit, on exit Scan_Ptr points past the last character of
426 -- For each digit encountered, UI_Int_Value is multiplied by 10,
427 -- and the value of the digit added to the result. In addition,
428 -- the value in Scale is decremented by one for each actual digit
431 --------------------------
432 -- Error_Digit_Expected --
433 --------------------------
435 procedure Error_Digit_Expected is
437 Error_Msg_S ("digit expected");
438 end Error_Digit_Expected;
444 procedure Scan_Integer is
446 -- Next character scanned
449 C := Source (Scan_Ptr);
451 -- Loop through digits (allowing underlines)
454 Accumulate_Checksum (C);
456 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
457 Scan_Ptr := Scan_Ptr + 1;
459 C := Source (Scan_Ptr);
463 -- We do not accumulate the '_' in the checksum, so that
464 -- 1_234 is equivalent to 1234, and does not trigger
465 -- compilation for "minimal recompilation" (gnatmake -m).
468 Scan_Ptr := Scan_Ptr + 1;
469 C := Source (Scan_Ptr);
471 Error_No_Double_Underline;
474 if C not in '0' .. '9' then
475 Error_Digit_Expected;
480 exit when C not in '0' .. '9';
486 ----------------------------------
487 -- Start of Processing for Nlit --
488 ----------------------------------
493 UI_Int_Value := Uint_0;
497 Point_Scanned := False;
498 UI_Num_Value := UI_Int_Value;
500 -- Various possibilities now for continuing the literal are
501 -- period, E/e (for exponent), or :/# (for based literal).
504 C := Source (Scan_Ptr);
508 -- Scan out point, but do not scan past .. which is a range
509 -- sequence, and must not be eaten up scanning a numeric literal.
511 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
512 Accumulate_Checksum ('.');
514 if Point_Scanned then
515 Error_Msg_S ("duplicate point ignored");
518 Point_Scanned := True;
519 Scan_Ptr := Scan_Ptr + 1;
520 C := Source (Scan_Ptr);
522 if C not in '0' .. '9' then
524 ("real literal cannot end with point", Scan_Ptr - 1);
527 UI_Num_Value := UI_Int_Value;
531 -- Based literal case. The base is the value we already scanned.
532 -- In the case of colon, we insist that the following character
533 -- is indeed an extended digit or a period. This catches a number
534 -- of common errors, as well as catching the well known tricky
535 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
538 or else (C = ':' and then
539 (Source (Scan_Ptr + 1) = '.'
541 Source (Scan_Ptr + 1) in '0' .. '9'
543 Source (Scan_Ptr + 1) in 'A' .. 'Z'
545 Source (Scan_Ptr + 1) in 'a' .. 'z'))
547 if C = ':' and then Warn_On_Obsolescent_Feature then
549 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
551 ("\use ""'#"" instead?");
554 Accumulate_Checksum (C);
556 UI_Base := UI_Int_Value;
558 if UI_Base < 2 or else UI_Base > 16 then
559 Error_Msg_SC ("base not 2-16");
563 Base := UI_To_Int (UI_Base);
564 Scan_Ptr := Scan_Ptr + 1;
566 -- Scan out extended integer [. integer]
568 C := Source (Scan_Ptr);
569 UI_Int_Value := Uint_0;
573 if C in '0' .. '9' then
574 Accumulate_Checksum (C);
575 Extended_Digit_Value :=
576 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
578 elsif C in 'A' .. 'F' then
579 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
580 Extended_Digit_Value :=
581 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
583 elsif C in 'a' .. 'f' then
584 Accumulate_Checksum (C);
585 Extended_Digit_Value :=
586 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
589 Error_Msg_S ("extended digit expected");
593 if Extended_Digit_Value >= Base then
594 Error_Msg_S ("digit '>= base");
597 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
599 Scan_Ptr := Scan_Ptr + 1;
600 C := Source (Scan_Ptr);
604 Accumulate_Checksum ('_');
605 Scan_Ptr := Scan_Ptr + 1;
606 C := Source (Scan_Ptr);
608 Error_No_Double_Underline;
612 Accumulate_Checksum ('.');
614 if Point_Scanned then
615 Error_Msg_S ("duplicate point ignored");
618 Scan_Ptr := Scan_Ptr + 1;
619 C := Source (Scan_Ptr);
620 Point_Scanned := True;
623 elsif C = Base_Char then
624 Accumulate_Checksum (C);
625 Scan_Ptr := Scan_Ptr + 1;
628 elsif C = '#' or else C = ':' then
629 Error_Msg_S ("based number delimiters must match");
630 Scan_Ptr := Scan_Ptr + 1;
633 elsif not Identifier_Char (C) then
634 if Base_Char = '#' then
635 Error_Msg_S ("missing '#");
637 Error_Msg_S ("missing ':");
645 UI_Num_Value := UI_Int_Value;
650 if not Point_Scanned then
654 UI_Scale := UI_From_Int (Scale);
657 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
658 Accumulate_Checksum ('e');
659 Scan_Ptr := Scan_Ptr + 1;
660 Exponent_Is_Negative := False;
662 if Source (Scan_Ptr) = '+' then
663 Accumulate_Checksum ('+');
664 Scan_Ptr := Scan_Ptr + 1;
666 elsif Source (Scan_Ptr) = '-' then
667 Accumulate_Checksum ('-');
669 if not Point_Scanned then
671 ("negative exponent not allowed for integer literal");
673 Exponent_Is_Negative := True;
676 Scan_Ptr := Scan_Ptr + 1;
679 UI_Int_Value := Uint_0;
681 if Source (Scan_Ptr) in '0' .. '9' then
684 Error_Digit_Expected;
687 if Exponent_Is_Negative then
688 UI_Scale := UI_Scale - UI_Int_Value;
690 UI_Scale := UI_Scale + UI_Int_Value;
694 -- Case of real literal to be returned
696 if Point_Scanned then
697 Token := Tok_Real_Literal;
698 Real_Literal_Value :=
704 -- Case of integer literal to be returned
707 Token := Tok_Integer_Literal;
710 Int_Literal_Value := UI_Num_Value;
712 -- Avoid doing possibly expensive calculations in cases like
713 -- parsing 163E800_000# when semantics will not be done anyway.
714 -- This is especially useful when parsing garbled input.
716 elsif Operating_Mode /= Check_Syntax
717 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
719 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
722 Int_Literal_Value := No_Uint;
728 Accumulate_Token_Checksum;
740 Delimiter : Character;
741 -- Delimiter (first character of string)
744 -- Current source program character
747 -- Current character code value
750 -- Error flag for Scan_Wide call
752 procedure Error_Bad_String_Char;
753 -- Signal bad character in string/character literal. On entry
754 -- Scan_Ptr points to the improper character encountered during
755 -- the scan. Scan_Ptr is not modified, so it still points to the bad
756 -- character on return.
758 procedure Error_Unterminated_String;
759 -- Procedure called if a line terminator character is encountered
760 -- during scanning a string, meaning that the string is not properly
763 procedure Set_String;
764 -- Procedure used to distinguish between string and operator symbol.
765 -- On entry the string has been scanned out, and its characters start
766 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token
767 -- is set to Tok_String_Literal or Tok_Operator_Symbol as
768 -- appropriate, and Token_Node is appropriately initialized.
769 -- In addition, in the operator symbol case, Token_Name is
770 -- appropriately set.
772 ---------------------------
773 -- Error_Bad_String_Char --
774 ---------------------------
776 procedure Error_Bad_String_Char is
777 C : constant Character := Source (Scan_Ptr);
781 Error_Msg_S ("horizontal tab not allowed in string");
783 elsif C = VT or else C = FF then
784 Error_Msg_S ("format effector not allowed in string");
786 elsif C in Upper_Half_Character then
787 Error_Msg_S ("(Ada 83) upper half character not allowed");
790 Error_Msg_S ("control character not allowed in string");
792 end Error_Bad_String_Char;
794 -------------------------------
795 -- Error_Unterminated_String --
796 -------------------------------
798 procedure Error_Unterminated_String is
800 -- An interesting little refinement. Consider the following
803 -- A := "this is an unterminated string;
804 -- A := "this is an unterminated string &
805 -- P(A, "this is a parameter that didn't get terminated);
807 -- We fiddle a little to do slightly better placement in these
808 -- cases also if there is white space at the end of the line we
809 -- place the flag at the start of this white space, not at the
810 -- end. Note that we only have to test for blanks, since tabs
811 -- aren't allowed in strings in the first place and would have
812 -- caused an error message.
814 -- Two more cases that we treat specially are:
816 -- A := "this string uses the wrong terminator'
817 -- A := "this string uses the wrong terminator' &
819 -- In these cases we give a different error message as well
821 -- We actually reposition the scan pointer to the point where we
822 -- place the flag in these cases, since it seems a better bet on
823 -- the original intention.
825 while Source (Scan_Ptr - 1) = ' '
826 or else Source (Scan_Ptr - 1) = '&'
828 Scan_Ptr := Scan_Ptr - 1;
832 -- Check for case of incorrect string terminator, but single quote
833 -- is not considered incorrect if the opening terminator misused
834 -- a single quote (error message already given).
837 and then Source (Scan_Ptr - 1) = '''
841 ("incorrect string terminator character", Scan_Ptr - 1);
845 if Source (Scan_Ptr - 1) = ';' then
846 Scan_Ptr := Scan_Ptr - 1;
849 if Source (Scan_Ptr - 1) = ')' then
850 Scan_Ptr := Scan_Ptr - 1;
855 Error_Msg_S ("missing string quote");
856 end Error_Unterminated_String;
862 procedure Set_String is
863 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
869 -- Token_Name is currently set to Error_Name. The following
870 -- section of code resets Token_Name to the proper Name_Op_xx
871 -- value if the string is a valid operator symbol, otherwise it is
872 -- left set to Error_Name.
875 C1 := Source (Token_Ptr + 1);
879 Token_Name := Name_Op_Eq;
882 Token_Name := Name_Op_Gt;
885 Token_Name := Name_Op_Lt;
888 Token_Name := Name_Op_Add;
891 Token_Name := Name_Op_Subtract;
894 Token_Name := Name_Op_Concat;
897 Token_Name := Name_Op_Multiply;
900 Token_Name := Name_Op_Divide;
907 C1 := Source (Token_Ptr + 1);
908 C2 := Source (Token_Ptr + 2);
910 if C1 = '*' and then C2 = '*' then
911 Token_Name := Name_Op_Expon;
916 Token_Name := Name_Op_Ne;
918 Token_Name := Name_Op_Le;
920 Token_Name := Name_Op_Ge;
923 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
924 (C2 = 'R' or else C2 = 'r')
926 Token_Name := Name_Op_Or;
930 C1 := Source (Token_Ptr + 1);
931 C2 := Source (Token_Ptr + 2);
932 C3 := Source (Token_Ptr + 3);
934 if (C1 = 'A' or else C1 = 'a') and then -- AND
935 (C2 = 'N' or else C2 = 'n') and then
936 (C3 = 'D' or else C3 = 'd')
938 Token_Name := Name_Op_And;
940 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
941 (C2 = 'B' or else C2 = 'b') and then
942 (C3 = 'S' or else C3 = 's')
944 Token_Name := Name_Op_Abs;
946 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
947 (C2 = 'O' or else C2 = 'o') and then
948 (C3 = 'D' or else C3 = 'd')
950 Token_Name := Name_Op_Mod;
952 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
953 (C2 = 'O' or else C2 = 'o') and then
954 (C3 = 'T' or else C3 = 't')
956 Token_Name := Name_Op_Not;
958 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
959 (C2 = 'E' or else C2 = 'e') and then
960 (C3 = 'M' or else C3 = 'm')
962 Token_Name := Name_Op_Rem;
964 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
965 (C2 = 'O' or else C2 = 'o') and then
966 (C3 = 'R' or else C3 = 'r')
968 Token_Name := Name_Op_Xor;
973 -- If it is an operator symbol, then Token_Name is set.
974 -- If it is some other string value, then Token_Name still
975 -- contains Error_Name.
977 if Token_Name = Error_Name then
978 Token := Tok_String_Literal;
981 Token := Tok_Operator_Symbol;
991 -- On entry, Scan_Ptr points to the opening character of the string
992 -- which is either a percent, double quote, or apostrophe
993 -- (single quote). The latter case is an error detected by
994 -- the character literal circuit.
996 Delimiter := Source (Scan_Ptr);
997 Accumulate_Checksum (Delimiter);
999 Scan_Ptr := Scan_Ptr + 1;
1001 -- Loop to scan out characters of string literal
1004 C := Source (Scan_Ptr);
1006 if C = Delimiter then
1007 Accumulate_Checksum (C);
1008 Scan_Ptr := Scan_Ptr + 1;
1009 exit when Source (Scan_Ptr) /= Delimiter;
1010 Code := Get_Char_Code (C);
1011 Accumulate_Checksum (C);
1012 Scan_Ptr := Scan_Ptr + 1;
1015 if C = '"' and then Delimiter = '%' then
1017 ("quote not allowed in percent delimited string");
1018 Code := Get_Char_Code (C);
1019 Scan_Ptr := Scan_Ptr + 1;
1023 Wide_Character_Encoding_Method
1024 in WC_ESC_Encoding_Method)
1026 (C in Upper_Half_Character
1028 Upper_Half_Encoding)
1032 Source (Scan_Ptr + 1) = '"'
1034 Identifier_Char (Source (Scan_Ptr + 2)))
1036 Scan_Wide (Source, Scan_Ptr, Code, Err);
1037 Accumulate_Checksum (Code);
1040 Error_Illegal_Wide_Character;
1041 Code := Get_Char_Code (' ');
1045 Accumulate_Checksum (C);
1047 if C not in Graphic_Character then
1048 if C in Line_Terminator then
1049 Error_Unterminated_String;
1052 elsif C in Upper_Half_Character then
1054 Error_Bad_String_Char;
1058 Error_Bad_String_Char;
1062 Code := Get_Char_Code (C);
1063 Scan_Ptr := Scan_Ptr + 1;
1067 Store_String_Char (Code);
1069 if not In_Character_Range (Code) then
1070 Wide_Character_Found := True;
1074 String_Literal_Id := End_String;
1080 -- Start of body of Scan
1083 Prev_Token := Token;
1084 Prev_Token_Ptr := Token_Ptr;
1085 Token_Name := Error_Name;
1087 -- The following loop runs more than once only if a format effector
1088 -- (tab, vertical tab, form feed, line feed, carriage return) is
1089 -- encountered and skipped, or some error situation, such as an
1090 -- illegal character, is encountered.
1093 -- Skip past blanks, loop is opened up for speed
1095 while Source (Scan_Ptr) = ' ' loop
1097 if Source (Scan_Ptr + 1) /= ' ' then
1098 Scan_Ptr := Scan_Ptr + 1;
1102 if Source (Scan_Ptr + 2) /= ' ' then
1103 Scan_Ptr := Scan_Ptr + 2;
1107 if Source (Scan_Ptr + 3) /= ' ' then
1108 Scan_Ptr := Scan_Ptr + 3;
1112 if Source (Scan_Ptr + 4) /= ' ' then
1113 Scan_Ptr := Scan_Ptr + 4;
1117 if Source (Scan_Ptr + 5) /= ' ' then
1118 Scan_Ptr := Scan_Ptr + 5;
1122 if Source (Scan_Ptr + 6) /= ' ' then
1123 Scan_Ptr := Scan_Ptr + 6;
1127 if Source (Scan_Ptr + 7) /= ' ' then
1128 Scan_Ptr := Scan_Ptr + 7;
1132 Scan_Ptr := Scan_Ptr + 8;
1135 -- We are now at a non-blank character, which is the first character
1136 -- of the token we will scan, and hence the value of Token_Ptr.
1138 Token_Ptr := Scan_Ptr;
1140 -- Here begins the main case statement which transfers control on
1141 -- the basis of the non-blank character we have encountered.
1143 case Source (Scan_Ptr) is
1145 -- Line terminator characters
1147 when CR | LF | FF | VT => Line_Terminator_Case : begin
1149 -- Check line too long
1153 -- Set Token_Ptr, if End_Of_Line is a token, for the case when
1154 -- it is a physical line.
1156 if End_Of_Line_Is_Token then
1157 Token_Ptr := Scan_Ptr;
1164 Skip_Line_Terminators (Scan_Ptr, Physical);
1166 -- If we are at start of physical line, update scan pointers
1167 -- to reflect the start of the new line.
1170 Current_Line_Start := Scan_Ptr;
1171 Start_Column := Set_Start_Column;
1172 First_Non_Blank_Location := Scan_Ptr;
1174 -- If End_Of_Line is a token, we return it as it is
1177 if End_Of_Line_Is_Token then
1178 Token := Tok_End_Of_Line;
1183 end Line_Terminator_Case;
1185 -- Horizontal tab, just skip past it
1188 if Style_Check then Style.Check_HT; end if;
1189 Scan_Ptr := Scan_Ptr + 1;
1191 -- End of file character, treated as an end of file only if it
1192 -- is the last character in the buffer, otherwise it is ignored.
1195 if Scan_Ptr = Source_Last (Current_Source_File) then
1201 Scan_Ptr := Scan_Ptr + 1;
1207 Accumulate_Checksum ('&');
1209 if Source (Scan_Ptr + 1) = '&' then
1210 Error_Msg_S ("'&'& should be `AND THEN`");
1211 Scan_Ptr := Scan_Ptr + 2;
1216 Scan_Ptr := Scan_Ptr + 1;
1217 Token := Tok_Ampersand;
1221 -- Asterisk (can be multiplication operator or double asterisk
1222 -- which is the exponentiation compound delimiter).
1225 Accumulate_Checksum ('*');
1227 if Source (Scan_Ptr + 1) = '*' then
1228 Accumulate_Checksum ('*');
1229 Scan_Ptr := Scan_Ptr + 2;
1230 Token := Tok_Double_Asterisk;
1234 Scan_Ptr := Scan_Ptr + 1;
1235 Token := Tok_Asterisk;
1239 -- Colon, which can either be an isolated colon, or part of an
1240 -- assignment compound delimiter.
1243 Accumulate_Checksum (':');
1245 if Double_Char_Token ('=') then
1246 Token := Tok_Colon_Equal;
1247 if Style_Check then Style.Check_Colon_Equal; end if;
1250 elsif Source (Scan_Ptr + 1) = '-'
1251 and then Source (Scan_Ptr + 2) /= '-'
1253 Token := Tok_Colon_Equal;
1254 Error_Msg (":- should be :=", Scan_Ptr);
1255 Scan_Ptr := Scan_Ptr + 2;
1259 Scan_Ptr := Scan_Ptr + 1;
1261 if Style_Check then Style.Check_Colon; end if;
1268 Accumulate_Checksum ('(');
1269 Scan_Ptr := Scan_Ptr + 1;
1270 Token := Tok_Left_Paren;
1271 if Style_Check then Style.Check_Left_Paren; end if;
1277 if Source (Scan_Ptr + 1) = '"' then
1279 goto Scan_Identifier;
1282 Error_Msg_S ("illegal character, replaced by ""(""");
1283 Scan_Ptr := Scan_Ptr + 1;
1284 Token := Tok_Left_Paren;
1291 Error_Msg_S ("illegal character, replaced by ""(""");
1292 Scan_Ptr := Scan_Ptr + 1;
1293 Token := Tok_Left_Paren;
1299 Accumulate_Checksum (',');
1300 Scan_Ptr := Scan_Ptr + 1;
1302 if Style_Check then Style.Check_Comma; end if;
1305 -- Dot, which is either an isolated period, or part of a double
1306 -- dot compound delimiter sequence. We also check for the case of
1307 -- a digit following the period, to give a better error message.
1310 Accumulate_Checksum ('.');
1312 if Double_Char_Token ('.') then
1313 Token := Tok_Dot_Dot;
1314 if Style_Check then Style.Check_Dot_Dot; end if;
1317 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1318 Error_Msg_S ("numeric literal cannot start with point");
1319 Scan_Ptr := Scan_Ptr + 1;
1322 Scan_Ptr := Scan_Ptr + 1;
1327 -- Equal, which can either be an equality operator, or part of the
1328 -- arrow (=>) compound delimiter.
1331 Accumulate_Checksum ('=');
1333 if Double_Char_Token ('>') then
1335 if Style_Check then Style.Check_Arrow; end if;
1338 elsif Source (Scan_Ptr + 1) = '=' then
1339 Error_Msg_S ("== should be =");
1340 Scan_Ptr := Scan_Ptr + 1;
1343 Scan_Ptr := Scan_Ptr + 1;
1347 -- Greater than, which can be a greater than operator, greater than
1348 -- or equal operator, or first character of a right label bracket.
1351 Accumulate_Checksum ('>');
1353 if Double_Char_Token ('=') then
1354 Token := Tok_Greater_Equal;
1357 elsif Double_Char_Token ('>') then
1358 Token := Tok_Greater_Greater;
1362 Scan_Ptr := Scan_Ptr + 1;
1363 Token := Tok_Greater;
1367 -- Less than, which can be a less than operator, less than or equal
1368 -- operator, or the first character of a left label bracket, or the
1369 -- first character of a box (<>) compound delimiter.
1372 Accumulate_Checksum ('<');
1374 if Double_Char_Token ('=') then
1375 Token := Tok_Less_Equal;
1378 elsif Double_Char_Token ('>') then
1380 if Style_Check then Style.Check_Box; end if;
1383 elsif Double_Char_Token ('<') then
1384 Token := Tok_Less_Less;
1388 Scan_Ptr := Scan_Ptr + 1;
1393 -- Minus, which is either a subtraction operator, or the first
1394 -- character of double minus starting a comment
1396 when '-' => Minus_Case : begin
1397 if Source (Scan_Ptr + 1) = '>' then
1398 Error_Msg_S ("invalid token");
1399 Scan_Ptr := Scan_Ptr + 2;
1403 elsif Source (Scan_Ptr + 1) /= '-' then
1404 Accumulate_Checksum ('-');
1405 Scan_Ptr := Scan_Ptr + 1;
1411 else -- Source (Scan_Ptr + 1) = '-' then
1412 if Style_Check then Style.Check_Comment; end if;
1413 Scan_Ptr := Scan_Ptr + 2;
1414 Start_Of_Comment := Scan_Ptr;
1416 -- Loop to scan comment (this loop runs more than once only if
1417 -- a horizontal tab or other non-graphic character is scanned)
1420 -- Scan to non graphic character (opened up for speed)
1423 exit when Source (Scan_Ptr) not in Graphic_Character;
1424 Scan_Ptr := Scan_Ptr + 1;
1425 exit when Source (Scan_Ptr) not in Graphic_Character;
1426 Scan_Ptr := Scan_Ptr + 1;
1427 exit when Source (Scan_Ptr) not in Graphic_Character;
1428 Scan_Ptr := Scan_Ptr + 1;
1429 exit when Source (Scan_Ptr) not in Graphic_Character;
1430 Scan_Ptr := Scan_Ptr + 1;
1431 exit when Source (Scan_Ptr) not in Graphic_Character;
1432 Scan_Ptr := Scan_Ptr + 1;
1435 -- Keep going if horizontal tab
1437 if Source (Scan_Ptr) = HT then
1438 if Style_Check then Style.Check_HT; end if;
1439 Scan_Ptr := Scan_Ptr + 1;
1441 -- Terminate scan of comment if line terminator
1443 elsif Source (Scan_Ptr) in Line_Terminator then
1446 -- Terminate scan of comment if end of file encountered
1447 -- (embedded EOF character or real last character in file)
1449 elsif Source (Scan_Ptr) = EOF then
1452 -- Keep going if character in 80-FF range, or is ESC. These
1453 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1454 -- They are allowed even in Ada 83 mode according to the
1455 -- approved AI. ESC was added to the AI in June 93.
1457 elsif Source (Scan_Ptr) in Upper_Half_Character
1458 or else Source (Scan_Ptr) = ESC
1460 Scan_Ptr := Scan_Ptr + 1;
1462 -- Otherwise we have an illegal comment character
1465 Error_Illegal_Character;
1470 -- Note that, except when comments are tokens, we do NOT
1471 -- execute a return here, instead we fall through to reexecute
1472 -- the scan loop to look for a token.
1474 if Comment_Is_Token then
1475 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1476 Name_Buffer (1 .. Name_Len) :=
1477 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1478 Comment_Id := Name_Find;
1479 Token := Tok_Comment;
1485 -- Double quote starting a string literal
1492 -- Percent starting a string literal
1495 if Warn_On_Obsolescent_Feature then
1497 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1499 ("\use """""" instead?");
1506 -- Apostrophe. This can either be the start of a character literal,
1507 -- or an isolated apostrophe used in a qualified expression or an
1508 -- attribute. We treat it as a character literal if it does not
1509 -- follow a right parenthesis, identifier, the keyword ALL or
1510 -- a literal. This means that we correctly treat constructs like:
1512 -- A := CHARACTER'('A');
1514 -- Note that RM-2.2(7) does not require a separator between
1515 -- "CHARACTER" and "'" in the above.
1517 when ''' => Char_Literal_Case : declare
1522 Accumulate_Checksum (''');
1523 Scan_Ptr := Scan_Ptr + 1;
1525 -- Here is where we make the test to distinguish the cases. Treat
1526 -- as apostrophe if previous token is an identifier, right paren
1527 -- or the reserved word "all" (latter case as in A.all'Address)
1528 -- (or the reserved word "project" in project files).
1529 -- Also treat it as apostrophe after a literal (this catches
1530 -- some legitimate cases, like A."abs"'Address, and also gives
1531 -- better error behavior for impossible cases like 123'xxx).
1533 if Prev_Token = Tok_Identifier
1534 or else Prev_Token = Tok_Right_Paren
1535 or else Prev_Token = Tok_All
1536 or else Prev_Token = Tok_Project
1537 or else Prev_Token in Token_Class_Literal
1539 Token := Tok_Apostrophe;
1540 if Style_Check then Style.Check_Apostrophe; end if;
1543 -- Otherwise the apostrophe starts a character literal
1546 -- Case of wide character literal with ESC or [ encoding
1548 if (Source (Scan_Ptr) = ESC
1550 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1552 (Source (Scan_Ptr) in Upper_Half_Character
1554 Upper_Half_Encoding)
1556 (Source (Scan_Ptr) = '['
1558 Source (Scan_Ptr + 1) = '"')
1560 Scan_Wide (Source, Scan_Ptr, Code, Err);
1561 Accumulate_Checksum (Code);
1564 Error_Illegal_Wide_Character;
1567 if Source (Scan_Ptr) /= ''' then
1568 Error_Msg_S ("missing apostrophe");
1570 Scan_Ptr := Scan_Ptr + 1;
1573 -- If we do not find a closing quote in the expected place then
1574 -- assume that we have a misguided attempt at a string literal.
1576 -- However, if previous token is RANGE, then we return an
1577 -- apostrophe instead since this gives better error recovery
1579 elsif Source (Scan_Ptr + 1) /= ''' then
1581 if Prev_Token = Tok_Range then
1582 Token := Tok_Apostrophe;
1586 Scan_Ptr := Scan_Ptr - 1;
1588 ("strings are delimited by double quote character");
1594 -- Otherwise we have a (non-wide) character literal
1597 Accumulate_Checksum (Source (Scan_Ptr));
1599 if Source (Scan_Ptr) not in Graphic_Character then
1600 if Source (Scan_Ptr) in Upper_Half_Character then
1602 Error_Illegal_Character;
1606 Error_Illegal_Character;
1610 Code := Get_Char_Code (Source (Scan_Ptr));
1611 Scan_Ptr := Scan_Ptr + 2;
1614 -- Fall through here with Scan_Ptr updated past the closing
1615 -- quote, and Code set to the Char_Code value for the literal
1617 Accumulate_Checksum (''');
1618 Token := Tok_Char_Literal;
1619 Set_Character_Literal_Name (Code);
1620 Token_Name := Name_Find;
1621 Character_Code := Code;
1625 end Char_Literal_Case;
1627 -- Right parenthesis
1630 Accumulate_Checksum (')');
1631 Scan_Ptr := Scan_Ptr + 1;
1632 Token := Tok_Right_Paren;
1633 if Style_Check then Style.Check_Right_Paren; end if;
1636 -- Right bracket or right brace, treated as right paren
1639 Error_Msg_S ("illegal character, replaced by "")""");
1640 Scan_Ptr := Scan_Ptr + 1;
1641 Token := Tok_Right_Paren;
1644 -- Slash (can be division operator or first character of not equal)
1647 Accumulate_Checksum ('/');
1649 if Double_Char_Token ('=') then
1650 Token := Tok_Not_Equal;
1653 Scan_Ptr := Scan_Ptr + 1;
1661 Accumulate_Checksum (';');
1662 Scan_Ptr := Scan_Ptr + 1;
1663 Token := Tok_Semicolon;
1664 if Style_Check then Style.Check_Semicolon; end if;
1669 when '|' => Vertical_Bar_Case : begin
1670 Accumulate_Checksum ('|');
1672 -- Special check for || to give nice message
1674 if Source (Scan_Ptr + 1) = '|' then
1675 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1676 Scan_Ptr := Scan_Ptr + 2;
1681 Scan_Ptr := Scan_Ptr + 1;
1682 Token := Tok_Vertical_Bar;
1683 if Style_Check then Style.Check_Vertical_Bar; end if;
1686 end Vertical_Bar_Case;
1688 -- Exclamation, replacement character for vertical bar
1690 when '!' => Exclamation_Case : begin
1691 Accumulate_Checksum ('!');
1693 if Warn_On_Obsolescent_Feature then
1695 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1697 ("\use ""'|"" instead?");
1700 if Source (Scan_Ptr + 1) = '=' then
1701 Error_Msg_S ("'!= should be /=");
1702 Scan_Ptr := Scan_Ptr + 2;
1703 Token := Tok_Not_Equal;
1707 Scan_Ptr := Scan_Ptr + 1;
1708 Token := Tok_Vertical_Bar;
1712 end Exclamation_Case;
1716 when '+' => Plus_Case : begin
1717 Accumulate_Checksum ('+');
1718 Scan_Ptr := Scan_Ptr + 1;
1723 -- Digits starting a numeric literal
1728 if Identifier_Char (Source (Scan_Ptr)) then
1730 ("delimiter required between literal and identifier");
1735 -- Lower case letters
1739 Name_Buffer (1) := Source (Scan_Ptr);
1740 Accumulate_Checksum (Name_Buffer (1));
1741 Scan_Ptr := Scan_Ptr + 1;
1742 goto Scan_Identifier;
1744 -- Upper case letters
1749 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1750 Accumulate_Checksum (Name_Buffer (1));
1751 Scan_Ptr := Scan_Ptr + 1;
1752 goto Scan_Identifier;
1754 -- Underline character
1757 if Special_Characters ('_') then
1758 Token_Ptr := Scan_Ptr;
1759 Scan_Ptr := Scan_Ptr + 1;
1760 Token := Tok_Special;
1761 Special_Character := '_';
1765 Error_Msg_S ("identifier cannot start with underline");
1767 Name_Buffer (1) := '_';
1768 Scan_Ptr := Scan_Ptr + 1;
1769 goto Scan_Identifier;
1771 -- Space (not possible, because we scanned past blanks)
1774 raise Program_Error;
1776 -- Characters in top half of ASCII 8-bit chart
1778 when Upper_Half_Character =>
1780 -- Wide character case. Note that Scan_Identifier will issue
1781 -- an appropriate message if wide characters are not allowed
1784 if Upper_Half_Encoding then
1786 goto Scan_Identifier;
1788 -- Otherwise we have OK Latin-1 character
1791 -- Upper half characters may possibly be identifier letters
1792 -- but can never be digits, so Identifier_Char can be used
1793 -- to test for a valid start of identifier character.
1795 if Identifier_Char (Source (Scan_Ptr)) then
1797 goto Scan_Identifier;
1799 Error_Illegal_Character;
1805 -- ESC character, possible start of identifier if wide characters
1806 -- using ESC encoding are allowed in identifiers, which we can
1807 -- tell by looking at the Identifier_Char flag for ESC, which is
1808 -- only true if these conditions are met.
1810 if Identifier_Char (ESC) then
1812 goto Scan_Identifier;
1814 Error_Illegal_Wide_Character;
1817 -- Invalid control characters
1819 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1820 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1821 EM | FS | GS | RS | US | DEL
1823 Error_Illegal_Character;
1825 -- Invalid graphic characters
1827 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1828 -- If Set_Special_Character has been called for this character,
1829 -- set Scans.Special_Character and return a Special token.
1831 if Special_Characters (Source (Scan_Ptr)) then
1832 Token_Ptr := Scan_Ptr;
1833 Token := Tok_Special;
1834 Special_Character := Source (Scan_Ptr);
1835 Scan_Ptr := Scan_Ptr + 1;
1838 -- otherwise, this is an illegal character
1841 Error_Illegal_Character;
1844 -- End switch on non-blank character
1848 -- End loop past format effectors. The exit from this loop is by
1849 -- executing a return statement following completion of token scan
1850 -- (control never falls out of this loop to the code which follows)
1854 -- Identifier scanning routine. On entry, some initial characters
1855 -- of the identifier may have already been stored in Name_Buffer.
1856 -- If so, Name_Len has the number of characters stored. otherwise
1857 -- Name_Len is set to zero on entry.
1861 -- This loop scans as fast as possible past lower half letters
1862 -- and digits, which we expect to be the most common characters.
1865 if Source (Scan_Ptr) in 'a' .. 'z'
1866 or else Source (Scan_Ptr) in '0' .. '9'
1868 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1869 Accumulate_Checksum (Source (Scan_Ptr));
1871 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1872 Name_Buffer (Name_Len + 1) :=
1873 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1874 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1879 -- Open out the loop a couple of times for speed
1881 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1882 or else Source (Scan_Ptr + 1) in '0' .. '9'
1884 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1885 Accumulate_Checksum (Source (Scan_Ptr + 1));
1887 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1888 Name_Buffer (Name_Len + 2) :=
1889 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1890 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1893 Scan_Ptr := Scan_Ptr + 1;
1894 Name_Len := Name_Len + 1;
1898 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1899 or else Source (Scan_Ptr + 2) in '0' .. '9'
1901 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1902 Accumulate_Checksum (Source (Scan_Ptr + 2));
1904 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1905 Name_Buffer (Name_Len + 3) :=
1906 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1907 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1909 Scan_Ptr := Scan_Ptr + 2;
1910 Name_Len := Name_Len + 2;
1914 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1915 or else Source (Scan_Ptr + 3) in '0' .. '9'
1917 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1918 Accumulate_Checksum (Source (Scan_Ptr + 3));
1920 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1921 Name_Buffer (Name_Len + 4) :=
1922 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1923 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1926 Scan_Ptr := Scan_Ptr + 3;
1927 Name_Len := Name_Len + 3;
1931 Scan_Ptr := Scan_Ptr + 4;
1932 Name_Len := Name_Len + 4;
1935 -- If we fall through, then we have encountered either an underline
1936 -- character, or an extended identifier character (i.e. one from the
1937 -- upper half), or a wide character, or an identifier terminator.
1938 -- The initial test speeds us up in the most common case where we
1939 -- have an identifier terminator. Note that ESC is an identifier
1940 -- character only if a wide character encoding method that uses
1941 -- ESC encoding is active, so if we find an ESC character we know
1942 -- that we have a wide character.
1944 if Identifier_Char (Source (Scan_Ptr)) then
1946 -- Case of underline
1948 if Source (Scan_Ptr) = '_' then
1949 Accumulate_Checksum ('_');
1951 -- Check error case of identifier ending with underscore
1952 -- In this case we ignore the underscore and do not store it.
1954 if not Identifier_Char (Source (Scan_Ptr + 1)) then
1955 Error_Msg_S ("identifier cannot end with underline");
1956 Scan_Ptr := Scan_Ptr + 1;
1958 -- Check error case of two underscores. In this case we do
1959 -- not store the first underscore (we will store the second)
1961 elsif Source (Scan_Ptr + 1) = '_' then
1962 Error_No_Double_Underline;
1964 -- Normal case of legal underscore
1967 Name_Len := Name_Len + 1;
1968 Name_Buffer (Name_Len) := '_';
1971 Scan_Ptr := Scan_Ptr + 1;
1972 goto Scan_Identifier;
1974 -- Upper half character
1976 elsif Source (Scan_Ptr) in Upper_Half_Character
1977 and then not Upper_Half_Encoding
1979 Accumulate_Checksum (Source (Scan_Ptr));
1980 Store_Encoded_Character
1981 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1982 Scan_Ptr := Scan_Ptr + 1;
1983 goto Scan_Identifier;
1985 -- Left bracket not followed by a quote terminates an identifier.
1986 -- This is an error, but we don't want to give a junk error msg
1987 -- about wide characters in this case!
1989 elsif Source (Scan_Ptr) = '['
1990 and then Source (Scan_Ptr + 1) /= '"'
1994 -- We know we have a wide character encoding here (the current
1995 -- character is either ESC, left bracket, or an upper half
1996 -- character depending on the encoding method).
1999 -- Scan out the wide character and insert the appropriate
2000 -- encoding into the name table entry for the identifier.
2003 Sptr : constant Source_Ptr := Scan_Ptr;
2009 Scan_Wide (Source, Scan_Ptr, Code, Err);
2011 -- If error, signal error
2014 Error_Illegal_Wide_Character;
2016 -- If the character scanned is a normal identifier
2017 -- character, then we treat it that way.
2019 elsif In_Character_Range (Code)
2020 and then Identifier_Char (Get_Character (Code))
2022 Chr := Get_Character (Code);
2023 Accumulate_Checksum (Chr);
2024 Store_Encoded_Character
2025 (Get_Char_Code (Fold_Lower (Chr)));
2027 -- Character is not normal identifier character, store
2028 -- it in encoded form.
2031 Accumulate_Checksum (Code);
2032 Store_Encoded_Character (Code);
2034 -- Make sure we are allowing wide characters in
2035 -- identifiers. Note that we allow wide character
2036 -- notation for an OK identifier character. This
2037 -- in particular allows bracket or other notation
2038 -- to be used for upper half letters.
2040 if Identifier_Character_Set /= 'w' then
2042 ("wide character not allowed in identifier", Sptr);
2047 goto Scan_Identifier;
2051 -- Scan of identifier is complete. The identifier is stored in
2052 -- Name_Buffer, and Scan_Ptr points past the last character.
2054 Token_Name := Name_Find;
2056 -- Here is where we check if it was a keyword
2058 if Get_Name_Table_Byte (Token_Name) /= 0
2059 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
2061 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2063 -- Deal with possible style check for non-lower case keyword,
2064 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
2065 -- for this purpose if they appear as attribute designators.
2066 -- Actually we only check the first character for speed.
2069 and then Source (Token_Ptr) <= 'Z'
2070 and then (Prev_Token /= Tok_Apostrophe
2072 (Token /= Tok_Access
2073 and then Token /= Tok_Delta
2074 and then Token /= Tok_Digits
2075 and then Token /= Tok_Range))
2077 Style.Non_Lower_Case_Keyword;
2080 -- We must reset Token_Name since this is not an identifier
2081 -- and if we leave Token_Name set, the parser gets confused
2082 -- because it thinks it is dealing with an identifier instead
2083 -- of the corresponding keyword.
2085 Token_Name := No_Name;
2086 Accumulate_Token_Checksum;
2089 -- It is an identifier after all
2092 Token := Tok_Identifier;
2093 Accumulate_Token_Checksum;
2099 --------------------------
2100 -- Set_Comment_As_Token --
2101 --------------------------
2103 procedure Set_Comment_As_Token (Value : Boolean) is
2105 Comment_Is_Token := Value;
2106 end Set_Comment_As_Token;
2108 ------------------------------
2109 -- Set_End_Of_Line_As_Token --
2110 ------------------------------
2112 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2114 End_Of_Line_Is_Token := Value;
2115 end Set_End_Of_Line_As_Token;
2117 ---------------------------
2118 -- Set_Special_Character --
2119 ---------------------------
2121 procedure Set_Special_Character (C : Character) is
2124 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2125 Special_Characters (C) := True;
2130 end Set_Special_Character;
2132 ----------------------
2133 -- Set_Start_Column --
2134 ----------------------
2136 -- Note: it seems at first glance a little expensive to compute this value
2137 -- for every source line (since it is certainly not used for all source
2138 -- lines). On the other hand, it doesn't take much more work to skip past
2139 -- the initial white space on the line counting the columns than it would
2140 -- to scan past the white space using the standard scanning circuits.
2142 function Set_Start_Column return Column_Number is
2143 Start_Column : Column_Number := 0;
2146 -- Outer loop scans past horizontal tab characters
2150 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2151 -- past the blanks and adjusting Start_Column to account for them.
2154 if Source (Scan_Ptr) = ' ' then
2155 if Source (Scan_Ptr + 1) = ' ' then
2156 if Source (Scan_Ptr + 2) = ' ' then
2157 if Source (Scan_Ptr + 3) = ' ' then
2158 if Source (Scan_Ptr + 4) = ' ' then
2159 if Source (Scan_Ptr + 5) = ' ' then
2160 if Source (Scan_Ptr + 6) = ' ' then
2161 Scan_Ptr := Scan_Ptr + 7;
2162 Start_Column := Start_Column + 7;
2164 Scan_Ptr := Scan_Ptr + 6;
2165 Start_Column := Start_Column + 6;
2169 Scan_Ptr := Scan_Ptr + 5;
2170 Start_Column := Start_Column + 5;
2174 Scan_Ptr := Scan_Ptr + 4;
2175 Start_Column := Start_Column + 4;
2179 Scan_Ptr := Scan_Ptr + 3;
2180 Start_Column := Start_Column + 3;
2184 Scan_Ptr := Scan_Ptr + 2;
2185 Start_Column := Start_Column + 2;
2189 Scan_Ptr := Scan_Ptr + 1;
2190 Start_Column := Start_Column + 1;
2196 end loop Blanks_Loop;
2198 -- Outer loop keeps going only if a horizontal tab follows
2200 if Source (Scan_Ptr) = HT then
2201 if Style_Check then Style.Check_HT; end if;
2202 Scan_Ptr := Scan_Ptr + 1;
2203 Start_Column := (Start_Column / 8) * 8 + 8;
2210 return Start_Column;
2211 end Set_Start_Column;