1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
10 -- Copyright (C) 1992-2002 Free Software Foundation, Inc. --
12 -- GNAT is free software; you can redistribute it and/or modify it under --
13 -- terms of the GNU General Public License as published by the Free Soft- --
14 -- ware Foundation; either version 2, or (at your option) any later ver- --
15 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
16 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
17 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
18 -- for more details. You should have received a copy of the GNU General --
19 -- Public License distributed with GNAT; see file COPYING. If not, write --
20 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
21 -- MA 02111-1307, USA. --
23 -- GNAT was originally developed by the GNAT team at New York University. --
24 -- Extensive contributions were provided by Ada Core Technologies Inc. --
26 ------------------------------------------------------------------------------
28 with Atree; use Atree;
29 with Csets; use Csets;
30 with Errout; use Errout;
31 with Hostparm; use Hostparm;
32 with Namet; use Namet;
34 with Scans; use Scans;
35 with Sinput; use Sinput;
36 with Sinfo; use Sinfo;
37 with Snames; use Snames;
39 with Widechar; use Widechar;
42 with System.WCh_Con; use System.WCh_Con;
47 -- Make control characters visible
49 Used_As_Identifier : array (Token_Type) of Boolean;
50 -- Flags set True if a given keyword is used as an identifier (used to
51 -- make sure that we only post an error message for incorrect use of a
52 -- keyword as an identifier once for a given keyword).
54 -----------------------
55 -- Local Subprograms --
56 -----------------------
58 procedure Accumulate_Checksum (C : Character);
59 pragma Inline (Accumulate_Checksum);
60 -- This routine accumulates the checksum given character C. During the
61 -- scanning of a source file, this routine is called with every character
62 -- in the source, excluding blanks, and all control characters (except
63 -- that ESC is included in the checksum). Upper case letters not in string
64 -- literals are folded by the caller. See Sinput spec for the documentation
65 -- of the checksum algorithm. Note: checksum values are only used if we
66 -- generate code, so it is not necessary to worry about making the right
67 -- sequence of calls in any error situation.
69 procedure Accumulate_Checksum (C : Char_Code);
70 pragma Inline (Accumulate_Checksum);
71 -- This version is identical, except that the argument, C, is a character
72 -- code value instead of a character. This is used when wide characters
73 -- are scanned. We use the character code rather than the ASCII characters
74 -- so that the checksum is independent of wide character encoding method.
76 procedure Initialize_Checksum;
77 pragma Inline (Initialize_Checksum);
78 -- Initialize checksum value
80 procedure Check_End_Of_Line;
81 -- Called when end of line encountered. Checks that line is not
82 -- too long, and that other style checks for the end of line are met.
84 function Determine_License return License_Type;
85 -- Scan header of file and check that it has an appropriate GNAT-style
86 -- header with a proper license statement. Returns GPL, Unrestricted,
87 -- or Modified_GPL depending on header. If none of these, returns Unknown.
89 function Double_Char_Token (C : Character) return Boolean;
90 -- This function is used for double character tokens like := or <>. It
91 -- checks if the character following Source (Scan_Ptr) is C, and if so
92 -- bumps Scan_Ptr past the pair of characters and returns True. A space
93 -- between the two characters is also recognized with an appropriate
94 -- error message being issued. If C is not present, False is returned.
95 -- Note that Double_Char_Token can only be used for tokens defined in
96 -- the Ada syntax (it's use for error cases like && is not appropriate
97 -- since we do not want a junk message for a case like &-space-&).
99 procedure Error_Illegal_Character;
100 -- Give illegal character error, Scan_Ptr points to character. On return,
101 -- Scan_Ptr is bumped past the illegal character.
103 procedure Error_Illegal_Wide_Character;
104 -- Give illegal wide character message. On return, Scan_Ptr is bumped
105 -- past the illegal character, which may still leave us pointing to
106 -- junk, not much we can do if the escape sequence is messed up!
108 procedure Error_Long_Line;
109 -- Signal error of excessively long line
111 procedure Error_No_Double_Underline;
112 -- Signal error of double underline character
115 -- This is the procedure for scanning out numeric literals. On entry,
116 -- Scan_Ptr points to the digit that starts the numeric literal (the
117 -- checksum for this character has not been accumulated yet). On return
118 -- Scan_Ptr points past the last character of the numeric literal, Token
119 -- and Token_Node are set appropriately, and the checksum is updated.
121 function Set_Start_Column return Column_Number;
122 -- This routine is called with Scan_Ptr pointing to the first character
123 -- of a line. On exit, Scan_Ptr is advanced to the first non-blank
124 -- character of this line (or to the terminating format effector if the
125 -- line contains no non-blank characters), and the returned result is the
126 -- column number of this non-blank character (zero origin), which is the
127 -- value to be stored in the Start_Column scan variable.
130 -- This is the procedure for scanning out string literals. On entry,
131 -- Scan_Ptr points to the opening string quote (the checksum for this
132 -- character has not been accumulated yet). On return Scan_Ptr points
133 -- past the closing quote of the string literal, Token and Token_Node
134 -- are set appropriately, and the checksum is upated.
136 -------------------------
137 -- Accumulate_Checksum --
138 -------------------------
140 procedure Accumulate_Checksum (C : Character) is
142 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
143 end Accumulate_Checksum;
145 procedure Accumulate_Checksum (C : Char_Code) is
147 Accumulate_Checksum (Character'Val (C / 256));
148 Accumulate_Checksum (Character'Val (C mod 256));
149 end Accumulate_Checksum;
151 -----------------------
152 -- Check_End_Of_Line --
153 -----------------------
155 procedure Check_End_Of_Line is
156 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
159 if Len > Hostparm.Max_Line_Length then
162 elsif Style_Check then
163 Style.Check_Line_Terminator (Len);
165 end Check_End_Of_Line;
167 -----------------------
168 -- Determine_License --
169 -----------------------
171 function Determine_License return License_Type is
172 GPL_Found : Boolean := False;
174 function Contains (S : String) return Boolean;
175 -- See if current comment contains successive non-blank characters
176 -- matching the contents of S. If so leave Scan_Ptr unchanged and
177 -- return True, otherwise leave Scan_Ptr unchanged and return False.
180 -- Skip to line terminator character
186 function Contains (S : String) return Boolean is
193 while Source (SP) /= CR and then Source (SP) /= LF loop
194 if Source (SP) = S (S'First) then
206 while Source (SS) = ' ' loop
210 exit when Source (SS) /= S (CP);
224 procedure Skip_EOL is
226 while Source (Scan_Ptr) /= CR
227 and then Source (Scan_Ptr) /= LF
229 Scan_Ptr := Scan_Ptr + 1;
233 -- Start of processing for Determine_License
237 if Source (Scan_Ptr) /= '-'
238 or else Source (Scan_Ptr + 1) /= '-'
246 elsif Contains ("Asaspecialexception") then
251 elsif Contains ("GNUGeneralPublicLicense") then
256 ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
259 ("ThisspecificationisderivedfromtheAdaReferenceManual")
272 Skip_Line_Terminators (Scan_Ptr, Physical);
274 -- If we are at start of physical line, update scan pointers
275 -- to reflect the start of the new line.
278 Current_Line_Start := Scan_Ptr;
279 Start_Column := Set_Start_Column;
280 First_Non_Blank_Location := Scan_Ptr;
284 end Determine_License;
286 ----------------------------
287 -- Determine_Token_Casing --
288 ----------------------------
290 function Determine_Token_Casing return Casing_Type is
292 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
293 end Determine_Token_Casing;
295 -----------------------
296 -- Double_Char_Token --
297 -----------------------
299 function Double_Char_Token (C : Character) return Boolean is
301 if Source (Scan_Ptr + 1) = C then
302 Accumulate_Checksum (C);
303 Scan_Ptr := Scan_Ptr + 2;
306 elsif Source (Scan_Ptr + 1) = ' '
307 and then Source (Scan_Ptr + 2) = C
309 Scan_Ptr := Scan_Ptr + 1;
310 Error_Msg_S ("no space allowed here");
311 Scan_Ptr := Scan_Ptr + 2;
317 end Double_Char_Token;
319 -----------------------------
320 -- Error_Illegal_Character --
321 -----------------------------
323 procedure Error_Illegal_Character is
325 Error_Msg_S ("illegal character");
326 Scan_Ptr := Scan_Ptr + 1;
327 end Error_Illegal_Character;
329 ----------------------------------
330 -- Error_Illegal_Wide_Character --
331 ----------------------------------
333 procedure Error_Illegal_Wide_Character is
337 ("illegal wide character, check " &
338 "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
341 ("illegal wide character, check -gnatW switch");
344 Scan_Ptr := Scan_Ptr + 1;
345 end Error_Illegal_Wide_Character;
347 ---------------------
348 -- Error_Long_Line --
349 ---------------------
351 procedure Error_Long_Line is
354 ("this line is too long",
355 Current_Line_Start + Hostparm.Max_Line_Length);
358 -------------------------------
359 -- Error_No_Double_Underline --
360 -------------------------------
362 procedure Error_No_Double_Underline is
364 Error_Msg_S ("two consecutive underlines not permitted");
365 end Error_No_Double_Underline;
367 -------------------------
368 -- Initialize_Checksum --
369 -------------------------
371 procedure Initialize_Checksum is
373 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
374 end Initialize_Checksum;
376 ------------------------
377 -- Initialize_Scanner --
378 ------------------------
380 procedure Initialize_Scanner
381 (Unit : Unit_Number_Type;
382 Index : Source_File_Index)
384 GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
387 -- Set up Token_Type values in Names Table entries for reserved keywords
388 -- We use the Pos value of the Token_Type value. Note we are relying on
389 -- the fact that Token_Type'Val (0) is not a reserved word!
391 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
392 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
393 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
394 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
395 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
396 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
397 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
398 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
399 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
400 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
401 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
402 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
403 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
404 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
405 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
406 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
407 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
408 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
409 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
410 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
411 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
412 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
413 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
414 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
415 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
416 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
417 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
418 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
419 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
420 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
421 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
422 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
423 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
424 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
425 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
426 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
427 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
428 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
429 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
430 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
431 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
432 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
433 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
434 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
435 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
436 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
437 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
438 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
439 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
440 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
441 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
442 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
443 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
444 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
445 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
446 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
447 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
448 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
449 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
450 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
451 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
452 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
453 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
454 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
455 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
456 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
457 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
458 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
459 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
461 -- Initialize scan control variables
463 Current_Source_File := Index;
464 Source := Source_Text (Current_Source_File);
465 Current_Source_Unit := Unit;
466 Scan_Ptr := Source_First (Current_Source_File);
468 Token_Ptr := Scan_Ptr;
469 Current_Line_Start := Scan_Ptr;
471 Token_Name := No_Name;
472 Start_Column := Set_Start_Column;
473 First_Non_Blank_Location := Scan_Ptr;
477 -- Set default for Comes_From_Source. All nodes built now until we
478 -- reenter the analyzer will have Comes_From_Source set to True
480 Set_Comes_From_Source_Default (True);
482 -- Check license if GNAT type header possibly present
484 if Source_Last (Index) - Scan_Ptr > 80
485 and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
487 Set_License (Current_Source_File, Determine_License);
490 -- Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
494 -- Clear flags for reserved words used as identifiers
496 for J in Token_Type loop
497 Used_As_Identifier (J) := False;
500 end Initialize_Scanner;
506 procedure Nlit is separate;
515 Prev_Token_Ptr := Token_Ptr;
516 Token_Name := Error_Name;
518 -- The following loop runs more than once only if a format effector
519 -- (tab, vertical tab, form feed, line feed, carriage return) is
520 -- encountered and skipped, or some error situation, such as an
521 -- illegal character, is encountered.
524 -- Skip past blanks, loop is opened up for speed
526 while Source (Scan_Ptr) = ' ' loop
528 if Source (Scan_Ptr + 1) /= ' ' then
529 Scan_Ptr := Scan_Ptr + 1;
533 if Source (Scan_Ptr + 2) /= ' ' then
534 Scan_Ptr := Scan_Ptr + 2;
538 if Source (Scan_Ptr + 3) /= ' ' then
539 Scan_Ptr := Scan_Ptr + 3;
543 if Source (Scan_Ptr + 4) /= ' ' then
544 Scan_Ptr := Scan_Ptr + 4;
548 if Source (Scan_Ptr + 5) /= ' ' then
549 Scan_Ptr := Scan_Ptr + 5;
553 if Source (Scan_Ptr + 6) /= ' ' then
554 Scan_Ptr := Scan_Ptr + 6;
558 if Source (Scan_Ptr + 7) /= ' ' then
559 Scan_Ptr := Scan_Ptr + 7;
563 Scan_Ptr := Scan_Ptr + 8;
566 -- We are now at a non-blank character, which is the first character
567 -- of the token we will scan, and hence the value of Token_Ptr.
569 Token_Ptr := Scan_Ptr;
571 -- Here begins the main case statement which transfers control on
572 -- the basis of the non-blank character we have encountered.
574 case Source (Scan_Ptr) is
576 -- Line terminator characters
578 when CR | LF | FF | VT => Line_Terminator_Case : begin
580 -- Check line too long
588 Skip_Line_Terminators (Scan_Ptr, Physical);
590 -- If we are at start of physical line, update scan pointers
591 -- to reflect the start of the new line.
594 Current_Line_Start := Scan_Ptr;
595 Start_Column := Set_Start_Column;
596 First_Non_Blank_Location := Scan_Ptr;
599 end Line_Terminator_Case;
601 -- Horizontal tab, just skip past it
604 if Style_Check then Style.Check_HT; end if;
605 Scan_Ptr := Scan_Ptr + 1;
607 -- End of file character, treated as an end of file only if it
608 -- is the last character in the buffer, otherwise it is ignored.
611 if Scan_Ptr = Source_Last (Current_Source_File) then
617 Scan_Ptr := Scan_Ptr + 1;
623 Accumulate_Checksum ('&');
625 if Source (Scan_Ptr + 1) = '&' then
626 Error_Msg_S ("'&'& should be `AND THEN`");
627 Scan_Ptr := Scan_Ptr + 2;
632 Scan_Ptr := Scan_Ptr + 1;
633 Token := Tok_Ampersand;
637 -- Asterisk (can be multiplication operator or double asterisk
638 -- which is the exponentiation compound delimtier).
641 Accumulate_Checksum ('*');
643 if Source (Scan_Ptr + 1) = '*' then
644 Accumulate_Checksum ('*');
645 Scan_Ptr := Scan_Ptr + 2;
646 Token := Tok_Double_Asterisk;
650 Scan_Ptr := Scan_Ptr + 1;
651 Token := Tok_Asterisk;
655 -- Colon, which can either be an isolated colon, or part of an
656 -- assignment compound delimiter.
659 Accumulate_Checksum (':');
661 if Double_Char_Token ('=') then
662 Token := Tok_Colon_Equal;
663 if Style_Check then Style.Check_Colon_Equal; end if;
666 elsif Source (Scan_Ptr + 1) = '-'
667 and then Source (Scan_Ptr + 2) /= '-'
669 Token := Tok_Colon_Equal;
670 Error_Msg (":- should be :=", Scan_Ptr);
671 Scan_Ptr := Scan_Ptr + 2;
675 Scan_Ptr := Scan_Ptr + 1;
677 if Style_Check then Style.Check_Colon; end if;
684 Accumulate_Checksum ('(');
685 Scan_Ptr := Scan_Ptr + 1;
686 Token := Tok_Left_Paren;
687 if Style_Check then Style.Check_Left_Paren; end if;
693 if Source (Scan_Ptr + 1) = '"' then
695 goto Scan_Identifier;
698 Error_Msg_S ("illegal character, replaced by ""(""");
699 Scan_Ptr := Scan_Ptr + 1;
700 Token := Tok_Left_Paren;
707 Error_Msg_S ("illegal character, replaced by ""(""");
708 Scan_Ptr := Scan_Ptr + 1;
709 Token := Tok_Left_Paren;
715 Accumulate_Checksum (',');
716 Scan_Ptr := Scan_Ptr + 1;
718 if Style_Check then Style.Check_Comma; end if;
721 -- Dot, which is either an isolated period, or part of a double
722 -- dot compound delimiter sequence. We also check for the case of
723 -- a digit following the period, to give a better error message.
726 Accumulate_Checksum ('.');
728 if Double_Char_Token ('.') then
729 Token := Tok_Dot_Dot;
730 if Style_Check then Style.Check_Dot_Dot; end if;
733 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
734 Error_Msg_S ("numeric literal cannot start with point");
735 Scan_Ptr := Scan_Ptr + 1;
738 Scan_Ptr := Scan_Ptr + 1;
743 -- Equal, which can either be an equality operator, or part of the
744 -- arrow (=>) compound delimiter.
747 Accumulate_Checksum ('=');
749 if Double_Char_Token ('>') then
751 if Style_Check then Style.Check_Arrow; end if;
754 elsif Source (Scan_Ptr + 1) = '=' then
755 Error_Msg_S ("== should be =");
756 Scan_Ptr := Scan_Ptr + 1;
759 Scan_Ptr := Scan_Ptr + 1;
763 -- Greater than, which can be a greater than operator, greater than
764 -- or equal operator, or first character of a right label bracket.
767 Accumulate_Checksum ('>');
769 if Double_Char_Token ('=') then
770 Token := Tok_Greater_Equal;
773 elsif Double_Char_Token ('>') then
774 Token := Tok_Greater_Greater;
778 Scan_Ptr := Scan_Ptr + 1;
779 Token := Tok_Greater;
783 -- Less than, which can be a less than operator, less than or equal
784 -- operator, or the first character of a left label bracket, or the
785 -- first character of a box (<>) compound delimiter.
788 Accumulate_Checksum ('<');
790 if Double_Char_Token ('=') then
791 Token := Tok_Less_Equal;
794 elsif Double_Char_Token ('>') then
796 if Style_Check then Style.Check_Box; end if;
799 elsif Double_Char_Token ('<') then
800 Token := Tok_Less_Less;
804 Scan_Ptr := Scan_Ptr + 1;
809 -- Minus, which is either a subtraction operator, or the first
810 -- character of double minus starting a comment
812 when '-' => Minus_Case : begin
813 if Source (Scan_Ptr + 1) = '>' then
814 Error_Msg_S ("invalid token");
815 Scan_Ptr := Scan_Ptr + 2;
819 elsif Source (Scan_Ptr + 1) /= '-' then
820 Accumulate_Checksum ('-');
821 Scan_Ptr := Scan_Ptr + 1;
827 else -- Source (Scan_Ptr + 1) = '-' then
828 if Style_Check then Style.Check_Comment; end if;
829 Scan_Ptr := Scan_Ptr + 2;
831 -- Loop to scan comment (this loop runs more than once only if
832 -- a horizontal tab or other non-graphic character is scanned)
835 -- Scan to non graphic character (opened up for speed)
838 exit when Source (Scan_Ptr) not in Graphic_Character;
839 Scan_Ptr := Scan_Ptr + 1;
840 exit when Source (Scan_Ptr) not in Graphic_Character;
841 Scan_Ptr := Scan_Ptr + 1;
842 exit when Source (Scan_Ptr) not in Graphic_Character;
843 Scan_Ptr := Scan_Ptr + 1;
844 exit when Source (Scan_Ptr) not in Graphic_Character;
845 Scan_Ptr := Scan_Ptr + 1;
846 exit when Source (Scan_Ptr) not in Graphic_Character;
847 Scan_Ptr := Scan_Ptr + 1;
850 -- Keep going if horizontal tab
852 if Source (Scan_Ptr) = HT then
853 if Style_Check then Style.Check_HT; end if;
854 Scan_Ptr := Scan_Ptr + 1;
856 -- Terminate scan of comment if line terminator
858 elsif Source (Scan_Ptr) in Line_Terminator then
861 -- Terminate scan of comment if end of file encountered
862 -- (embedded EOF character or real last character in file)
864 elsif Source (Scan_Ptr) = EOF then
867 -- Keep going if character in 80-FF range, or is ESC. These
868 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
869 -- They are allowed even in Ada 83 mode according to the
870 -- approved AI. ESC was added to the AI in June 93.
872 elsif Source (Scan_Ptr) in Upper_Half_Character
873 or else Source (Scan_Ptr) = ESC
875 Scan_Ptr := Scan_Ptr + 1;
877 -- Otherwise we have an illegal comment character
880 Error_Illegal_Character;
885 -- Note that we do NOT execute a return here, instead we fall
886 -- through to reexecute the scan loop to look for a token.
891 -- Double quote or percent starting a string literal
897 -- Apostrophe. This can either be the start of a character literal,
898 -- or an isolated apostrophe used in a qualified expression or an
899 -- attribute. We treat it as a character literal if it does not
900 -- follow a right parenthesis, identifier, the keyword ALL or
901 -- a literal. This means that we correctly treat constructs like:
903 -- A := CHARACTER'('A');
905 -- Note that RM-2.2(7) does not require a separator between
906 -- "CHARACTER" and "'" in the above.
908 when ''' => Char_Literal_Case : declare
913 Accumulate_Checksum (''');
914 Scan_Ptr := Scan_Ptr + 1;
916 -- Here is where we make the test to distinguish the cases. Treat
917 -- as apostrophe if previous token is an identifier, right paren
918 -- or the reserved word "all" (latter case as in A.all'Address)
919 -- Also treat it as apostrophe after a literal (this catches
920 -- some legitimate cases, like A."abs"'Address, and also gives
921 -- better error behavior for impossible cases like 123'xxx).
923 if Prev_Token = Tok_Identifier
924 or else Prev_Token = Tok_Right_Paren
925 or else Prev_Token = Tok_All
926 or else Prev_Token in Token_Class_Literal
928 Token := Tok_Apostrophe;
931 -- Otherwise the apostrophe starts a character literal
934 -- Case of wide character literal with ESC or [ encoding
936 if (Source (Scan_Ptr) = ESC
938 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
940 (Source (Scan_Ptr) in Upper_Half_Character
944 (Source (Scan_Ptr) = '['
946 Source (Scan_Ptr + 1) = '"')
948 Scan_Wide (Source, Scan_Ptr, Code, Err);
949 Accumulate_Checksum (Code);
952 Error_Illegal_Wide_Character;
955 if Source (Scan_Ptr) /= ''' then
956 Error_Msg_S ("missing apostrophe");
958 Scan_Ptr := Scan_Ptr + 1;
961 -- If we do not find a closing quote in the expected place then
962 -- assume that we have a misguided attempt at a string literal.
964 -- However, if previous token is RANGE, then we return an
965 -- apostrophe instead since this gives better error recovery
967 elsif Source (Scan_Ptr + 1) /= ''' then
969 if Prev_Token = Tok_Range then
970 Token := Tok_Apostrophe;
974 Scan_Ptr := Scan_Ptr - 1;
976 ("strings are delimited by double quote character");
981 -- Otherwise we have a (non-wide) character literal
984 Accumulate_Checksum (Source (Scan_Ptr));
986 if Source (Scan_Ptr) not in Graphic_Character then
987 if Source (Scan_Ptr) in Upper_Half_Character then
989 Error_Illegal_Character;
993 Error_Illegal_Character;
997 Code := Get_Char_Code (Source (Scan_Ptr));
998 Scan_Ptr := Scan_Ptr + 2;
1001 -- Fall through here with Scan_Ptr updated past the closing
1002 -- quote, and Code set to the Char_Code value for the literal
1004 Accumulate_Checksum (''');
1005 Token := Tok_Char_Literal;
1006 Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1007 Set_Char_Literal_Value (Token_Node, Code);
1008 Set_Character_Literal_Name (Code);
1009 Token_Name := Name_Find;
1010 Set_Chars (Token_Node, Token_Name);
1013 end Char_Literal_Case;
1015 -- Right parenthesis
1018 Accumulate_Checksum (')');
1019 Scan_Ptr := Scan_Ptr + 1;
1020 Token := Tok_Right_Paren;
1021 if Style_Check then Style.Check_Right_Paren; end if;
1024 -- Right bracket or right brace, treated as right paren
1027 Error_Msg_S ("illegal character, replaced by "")""");
1028 Scan_Ptr := Scan_Ptr + 1;
1029 Token := Tok_Right_Paren;
1032 -- Slash (can be division operator or first character of not equal)
1035 Accumulate_Checksum ('/');
1037 if Double_Char_Token ('=') then
1038 Token := Tok_Not_Equal;
1041 Scan_Ptr := Scan_Ptr + 1;
1049 Accumulate_Checksum (';');
1050 Scan_Ptr := Scan_Ptr + 1;
1051 Token := Tok_Semicolon;
1052 if Style_Check then Style.Check_Semicolon; end if;
1057 when '|' => Vertical_Bar_Case : begin
1058 Accumulate_Checksum ('|');
1060 -- Special check for || to give nice message
1062 if Source (Scan_Ptr + 1) = '|' then
1063 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1064 Scan_Ptr := Scan_Ptr + 2;
1069 Scan_Ptr := Scan_Ptr + 1;
1070 Token := Tok_Vertical_Bar;
1071 if Style_Check then Style.Check_Vertical_Bar; end if;
1074 end Vertical_Bar_Case;
1076 -- Exclamation, replacement character for vertical bar
1078 when '!' => Exclamation_Case : begin
1079 Accumulate_Checksum ('!');
1081 if Source (Scan_Ptr + 1) = '=' then
1082 Error_Msg_S ("'!= should be /=");
1083 Scan_Ptr := Scan_Ptr + 2;
1084 Token := Tok_Not_Equal;
1088 Scan_Ptr := Scan_Ptr + 1;
1089 Token := Tok_Vertical_Bar;
1093 end Exclamation_Case;
1097 when '+' => Plus_Case : begin
1098 Accumulate_Checksum ('+');
1099 Scan_Ptr := Scan_Ptr + 1;
1104 -- Digits starting a numeric literal
1109 if Identifier_Char (Source (Scan_Ptr)) then
1111 ("delimiter required between literal and identifier");
1116 -- Lower case letters
1120 Name_Buffer (1) := Source (Scan_Ptr);
1121 Accumulate_Checksum (Name_Buffer (1));
1122 Scan_Ptr := Scan_Ptr + 1;
1123 goto Scan_Identifier;
1125 -- Upper case letters
1130 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1131 Accumulate_Checksum (Name_Buffer (1));
1132 Scan_Ptr := Scan_Ptr + 1;
1133 goto Scan_Identifier;
1135 -- Underline character
1138 Error_Msg_S ("identifier cannot start with underline");
1140 Name_Buffer (1) := '_';
1141 Scan_Ptr := Scan_Ptr + 1;
1142 goto Scan_Identifier;
1144 -- Space (not possible, because we scanned past blanks)
1147 raise Program_Error;
1149 -- Characters in top half of ASCII 8-bit chart
1151 when Upper_Half_Character =>
1153 -- Wide character case. Note that Scan_Identifier will issue
1154 -- an appropriate message if wide characters are not allowed
1157 if Upper_Half_Encoding then
1159 goto Scan_Identifier;
1161 -- Otherwise we have OK Latin-1 character
1164 -- Upper half characters may possibly be identifier letters
1165 -- but can never be digits, so Identifier_Char can be used
1166 -- to test for a valid start of identifier character.
1168 if Identifier_Char (Source (Scan_Ptr)) then
1170 goto Scan_Identifier;
1172 Error_Illegal_Character;
1178 -- ESC character, possible start of identifier if wide characters
1179 -- using ESC encoding are allowed in identifiers, which we can
1180 -- tell by looking at the Identifier_Char flag for ESC, which is
1181 -- only true if these conditions are met.
1183 if Identifier_Char (ESC) then
1185 goto Scan_Identifier;
1187 Error_Illegal_Wide_Character;
1190 -- Invalid control characters
1192 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1193 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1194 EM | FS | GS | RS | US | DEL
1196 Error_Illegal_Character;
1198 -- Invalid graphic characters
1200 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1201 Error_Illegal_Character;
1203 -- End switch on non-blank character
1207 -- End loop past format effectors. The exit from this loop is by
1208 -- executing a return statement following completion of token scan
1209 -- (control never falls out of this loop to the code which follows)
1213 -- Identifier scanning routine. On entry, some initial characters
1214 -- of the identifier may have already been stored in Name_Buffer.
1215 -- If so, Name_Len has the number of characters stored. otherwise
1216 -- Name_Len is set to zero on entry.
1220 -- This loop scans as fast as possible past lower half letters
1221 -- and digits, which we expect to be the most common characters.
1224 if Source (Scan_Ptr) in 'a' .. 'z'
1225 or else Source (Scan_Ptr) in '0' .. '9'
1227 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1228 Accumulate_Checksum (Source (Scan_Ptr));
1230 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1231 Name_Buffer (Name_Len + 1) :=
1232 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1233 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1238 -- Open out the loop a couple of times for speed
1240 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1241 or else Source (Scan_Ptr + 1) in '0' .. '9'
1243 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1244 Accumulate_Checksum (Source (Scan_Ptr + 1));
1246 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1247 Name_Buffer (Name_Len + 2) :=
1248 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1249 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1252 Scan_Ptr := Scan_Ptr + 1;
1253 Name_Len := Name_Len + 1;
1257 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1258 or else Source (Scan_Ptr + 2) in '0' .. '9'
1260 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1261 Accumulate_Checksum (Source (Scan_Ptr + 2));
1263 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1264 Name_Buffer (Name_Len + 3) :=
1265 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1266 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1268 Scan_Ptr := Scan_Ptr + 2;
1269 Name_Len := Name_Len + 2;
1273 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1274 or else Source (Scan_Ptr + 3) in '0' .. '9'
1276 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1277 Accumulate_Checksum (Source (Scan_Ptr + 3));
1279 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1280 Name_Buffer (Name_Len + 4) :=
1281 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1282 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1285 Scan_Ptr := Scan_Ptr + 3;
1286 Name_Len := Name_Len + 3;
1290 Scan_Ptr := Scan_Ptr + 4;
1291 Name_Len := Name_Len + 4;
1294 -- If we fall through, then we have encountered either an underline
1295 -- character, or an extended identifier character (i.e. one from the
1296 -- upper half), or a wide character, or an identifier terminator.
1297 -- The initial test speeds us up in the most common case where we
1298 -- have an identifier terminator. Note that ESC is an identifier
1299 -- character only if a wide character encoding method that uses
1300 -- ESC encoding is active, so if we find an ESC character we know
1301 -- that we have a wide character.
1303 if Identifier_Char (Source (Scan_Ptr)) then
1305 -- Case of underline, check for error cases of double underline,
1306 -- and for a trailing underline character
1308 if Source (Scan_Ptr) = '_' then
1309 Accumulate_Checksum ('_');
1310 Name_Len := Name_Len + 1;
1311 Name_Buffer (Name_Len) := '_';
1313 if Identifier_Char (Source (Scan_Ptr + 1)) then
1314 Scan_Ptr := Scan_Ptr + 1;
1316 if Source (Scan_Ptr) = '_' then
1317 Error_No_Double_Underline;
1321 Error_Msg_S ("identifier cannot end with underline");
1322 Scan_Ptr := Scan_Ptr + 1;
1325 goto Scan_Identifier;
1327 -- Upper half character
1329 elsif Source (Scan_Ptr) in Upper_Half_Character
1330 and then not Upper_Half_Encoding
1332 Accumulate_Checksum (Source (Scan_Ptr));
1333 Store_Encoded_Character
1334 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1335 Scan_Ptr := Scan_Ptr + 1;
1336 goto Scan_Identifier;
1338 -- Left bracket not followed by a quote terminates an identifier.
1339 -- This is an error, but we don't want to give a junk error msg
1340 -- about wide characters in this case!
1342 elsif Source (Scan_Ptr) = '['
1343 and then Source (Scan_Ptr + 1) /= '"'
1347 -- We know we have a wide character encoding here (the current
1348 -- character is either ESC, left bracket, or an upper half
1349 -- character depending on the encoding method).
1352 -- Scan out the wide character and insert the appropriate
1353 -- encoding into the name table entry for the identifier.
1356 Sptr : constant Source_Ptr := Scan_Ptr;
1362 Scan_Wide (Source, Scan_Ptr, Code, Err);
1364 -- If error, signal error
1367 Error_Illegal_Wide_Character;
1369 -- If the character scanned is a normal identifier
1370 -- character, then we treat it that way.
1372 elsif In_Character_Range (Code)
1373 and then Identifier_Char (Get_Character (Code))
1375 Chr := Get_Character (Code);
1376 Accumulate_Checksum (Chr);
1377 Store_Encoded_Character
1378 (Get_Char_Code (Fold_Lower (Chr)));
1380 -- Character is not normal identifier character, store
1381 -- it in encoded form.
1384 Accumulate_Checksum (Code);
1385 Store_Encoded_Character (Code);
1387 -- Make sure we are allowing wide characters in
1388 -- identifiers. Note that we allow wide character
1389 -- notation for an OK identifier character. This
1390 -- in particular allows bracket or other notation
1391 -- to be used for upper half letters.
1393 if Identifier_Character_Set /= 'w' then
1395 ("wide character not allowed in identifier", Sptr);
1400 goto Scan_Identifier;
1404 -- Scan of identifier is complete. The identifier is stored in
1405 -- Name_Buffer, and Scan_Ptr points past the last character.
1407 Token_Name := Name_Find;
1409 -- Here is where we check if it was a keyword
1411 if Get_Name_Table_Byte (Token_Name) /= 0
1412 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1414 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1416 -- Deal with possible style check for non-lower case keyword,
1417 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1418 -- for this purpose if they appear as attribute designators.
1419 -- Actually we only check the first character for speed.
1422 and then Source (Token_Ptr) <= 'Z'
1423 and then (Prev_Token /= Tok_Apostrophe
1425 (Token /= Tok_Access
1426 and then Token /= Tok_Delta
1427 and then Token /= Tok_Digits
1428 and then Token /= Tok_Range))
1430 Style.Non_Lower_Case_Keyword;
1433 -- We must reset Token_Name since this is not an identifier
1434 -- and if we leave Token_Name set, the parser gets confused
1435 -- because it thinks it is dealing with an identifier instead
1436 -- of the corresponding keyword.
1438 Token_Name := No_Name;
1441 -- It is an identifier after all
1444 Token_Node := New_Node (N_Identifier, Token_Ptr);
1445 Set_Chars (Token_Node, Token_Name);
1446 Token := Tok_Identifier;
1451 ---------------------
1452 -- Scan_First_Char --
1453 ---------------------
1455 function Scan_First_Char return Source_Ptr is
1456 Ptr : Source_Ptr := Current_Line_Start;
1460 if Source (Ptr) = ' ' then
1463 elsif Source (Ptr) = HT then
1464 if Style_Check then Style.Check_HT; end if;
1471 end Scan_First_Char;
1473 ------------------------------
1474 -- Scan_Reserved_Identifier --
1475 ------------------------------
1477 procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1478 Token_Chars : constant String := Token_Type'Image (Token);
1481 -- We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1482 -- This code extracts the xxx and makes an identifier out of it.
1486 for J in 5 .. Token_Chars'Length loop
1487 Name_Len := Name_Len + 1;
1488 Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1491 Token_Name := Name_Find;
1493 if not Used_As_Identifier (Token) or else Force_Msg then
1494 Error_Msg_Name_1 := Token_Name;
1495 Error_Msg_SC ("reserved word* cannot be used as identifier!");
1496 Used_As_Identifier (Token) := True;
1499 Token := Tok_Identifier;
1500 Token_Node := New_Node (N_Identifier, Token_Ptr);
1501 Set_Chars (Token_Node, Token_Name);
1502 end Scan_Reserved_Identifier;
1504 ----------------------
1505 -- Set_Start_Column --
1506 ----------------------
1508 -- Note: it seems at first glance a little expensive to compute this value
1509 -- for every source line (since it is certainly not used for all source
1510 -- lines). On the other hand, it doesn't take much more work to skip past
1511 -- the initial white space on the line counting the columns than it would
1512 -- to scan past the white space using the standard scanning circuits.
1514 function Set_Start_Column return Column_Number is
1515 Start_Column : Column_Number := 0;
1518 -- Outer loop scans past horizontal tab characters
1522 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1523 -- past the blanks and adjusting Start_Column to account for them.
1526 if Source (Scan_Ptr) = ' ' then
1527 if Source (Scan_Ptr + 1) = ' ' then
1528 if Source (Scan_Ptr + 2) = ' ' then
1529 if Source (Scan_Ptr + 3) = ' ' then
1530 if Source (Scan_Ptr + 4) = ' ' then
1531 if Source (Scan_Ptr + 5) = ' ' then
1532 if Source (Scan_Ptr + 6) = ' ' then
1533 Scan_Ptr := Scan_Ptr + 7;
1534 Start_Column := Start_Column + 7;
1536 Scan_Ptr := Scan_Ptr + 6;
1537 Start_Column := Start_Column + 6;
1541 Scan_Ptr := Scan_Ptr + 5;
1542 Start_Column := Start_Column + 5;
1546 Scan_Ptr := Scan_Ptr + 4;
1547 Start_Column := Start_Column + 4;
1551 Scan_Ptr := Scan_Ptr + 3;
1552 Start_Column := Start_Column + 3;
1556 Scan_Ptr := Scan_Ptr + 2;
1557 Start_Column := Start_Column + 2;
1561 Scan_Ptr := Scan_Ptr + 1;
1562 Start_Column := Start_Column + 1;
1568 end loop Blanks_Loop;
1570 -- Outer loop keeps going only if a horizontal tab follows
1572 if Source (Scan_Ptr) = HT then
1573 if Style_Check then Style.Check_HT; end if;
1574 Scan_Ptr := Scan_Ptr + 1;
1575 Start_Column := (Start_Column / 8) * 8 + 8;
1582 return Start_Column;
1583 end Set_Start_Column;
1589 procedure Slit is separate;