1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
11 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
24 -- GNAT was originally developed by the GNAT team at New York University. --
25 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
27 ------------------------------------------------------------------------------
29 with Atree; use Atree;
30 with Csets; use Csets;
31 with Errout; use Errout;
32 with Hostparm; use Hostparm;
33 with Namet; use Namet;
35 with Scans; use Scans;
36 with Sinput; use Sinput;
37 with Sinfo; use Sinfo;
38 with Snames; use Snames;
40 with Widechar; use Widechar;
43 with System.WCh_Con; use System.WCh_Con;
48 -- Make control characters visible
50 Used_As_Identifier : array (Token_Type) of Boolean;
51 -- Flags set True if a given keyword is used as an identifier (used to
52 -- make sure that we only post an error message for incorrect use of a
53 -- keyword as an identifier once for a given keyword).
55 -----------------------
56 -- Local Subprograms --
57 -----------------------
59 procedure Accumulate_Checksum (C : Character);
60 pragma Inline (Accumulate_Checksum);
61 -- This routine accumulates the checksum given character C. During the
62 -- scanning of a source file, this routine is called with every character
63 -- in the source, excluding blanks, and all control characters (except
64 -- that ESC is included in the checksum). Upper case letters not in string
65 -- literals are folded by the caller. See Sinput spec for the documentation
66 -- of the checksum algorithm. Note: checksum values are only used if we
67 -- generate code, so it is not necessary to worry about making the right
68 -- sequence of calls in any error situation.
70 procedure Accumulate_Checksum (C : Char_Code);
71 pragma Inline (Accumulate_Checksum);
72 -- This version is identical, except that the argument, C, is a character
73 -- code value instead of a character. This is used when wide characters
74 -- are scanned. We use the character code rather than the ASCII characters
75 -- so that the checksum is independent of wide character encoding method.
77 procedure Initialize_Checksum;
78 pragma Inline (Initialize_Checksum);
79 -- Initialize checksum value
81 procedure Check_End_Of_Line;
82 -- Called when end of line encountered. Checks that line is not
83 -- too long, and that other style checks for the end of line are met.
85 function Determine_License return License_Type;
86 -- Scan header of file and check that it has an appropriate GNAT-style
87 -- header with a proper license statement. Returns GPL, Unrestricted,
88 -- or Modified_GPL depending on header. If none of these, returns Unknown.
90 function Double_Char_Token (C : Character) return Boolean;
91 -- This function is used for double character tokens like := or <>. It
92 -- checks if the character following Source (Scan_Ptr) is C, and if so
93 -- bumps Scan_Ptr past the pair of characters and returns True. A space
94 -- between the two characters is also recognized with an appropriate
95 -- error message being issued. If C is not present, False is returned.
96 -- Note that Double_Char_Token can only be used for tokens defined in
97 -- the Ada syntax (it's use for error cases like && is not appropriate
98 -- since we do not want a junk message for a case like &-space-&).
100 procedure Error_Illegal_Character;
101 -- Give illegal character error, Scan_Ptr points to character. On return,
102 -- Scan_Ptr is bumped past the illegal character.
104 procedure Error_Illegal_Wide_Character;
105 -- Give illegal wide character message. On return, Scan_Ptr is bumped
106 -- past the illegal character, which may still leave us pointing to
107 -- junk, not much we can do if the escape sequence is messed up!
109 procedure Error_Long_Line;
110 -- Signal error of excessively long line
112 procedure Error_No_Double_Underline;
113 -- Signal error of double underline character
116 -- This is the procedure for scanning out numeric literals. On entry,
117 -- Scan_Ptr points to the digit that starts the numeric literal (the
118 -- checksum for this character has not been accumulated yet). On return
119 -- Scan_Ptr points past the last character of the numeric literal, Token
120 -- and Token_Node are set appropriately, and the checksum is updated.
122 function Set_Start_Column return Column_Number;
123 -- This routine is called with Scan_Ptr pointing to the first character
124 -- of a line. On exit, Scan_Ptr is advanced to the first non-blank
125 -- character of this line (or to the terminating format effector if the
126 -- line contains no non-blank characters), and the returned result is the
127 -- column number of this non-blank character (zero origin), which is the
128 -- value to be stored in the Start_Column scan variable.
131 -- This is the procedure for scanning out string literals. On entry,
132 -- Scan_Ptr points to the opening string quote (the checksum for this
133 -- character has not been accumulated yet). On return Scan_Ptr points
134 -- past the closing quote of the string literal, Token and Token_Node
135 -- are set appropriately, and the checksum is upated.
137 -------------------------
138 -- Accumulate_Checksum --
139 -------------------------
141 procedure Accumulate_Checksum (C : Character) is
143 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
144 end Accumulate_Checksum;
146 procedure Accumulate_Checksum (C : Char_Code) is
148 Accumulate_Checksum (Character'Val (C / 256));
149 Accumulate_Checksum (Character'Val (C mod 256));
150 end Accumulate_Checksum;
152 -----------------------
153 -- Check_End_Of_Line --
154 -----------------------
156 procedure Check_End_Of_Line is
157 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
160 if Len > Hostparm.Max_Line_Length then
163 elsif Style_Check then
164 Style.Check_Line_Terminator (Len);
166 end Check_End_Of_Line;
168 -----------------------
169 -- Determine_License --
170 -----------------------
172 function Determine_License return License_Type is
173 GPL_Found : Boolean := False;
175 function Contains (S : String) return Boolean;
176 -- See if current comment contains successive non-blank characters
177 -- matching the contents of S. If so leave Scan_Ptr unchanged and
178 -- return True, otherwise leave Scan_Ptr unchanged and return False.
181 -- Skip to line terminator character
187 function Contains (S : String) return Boolean is
194 while Source (SP) /= CR and then Source (SP) /= LF loop
195 if Source (SP) = S (S'First) then
207 while Source (SS) = ' ' loop
211 exit when Source (SS) /= S (CP);
225 procedure Skip_EOL is
227 while Source (Scan_Ptr) /= CR
228 and then Source (Scan_Ptr) /= LF
230 Scan_Ptr := Scan_Ptr + 1;
234 -- Start of processing for Determine_License
238 if Source (Scan_Ptr) /= '-'
239 or else Source (Scan_Ptr + 1) /= '-'
247 elsif Contains ("Asaspecialexception") then
252 elsif Contains ("GNUGeneralPublicLicense") then
257 ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
260 ("ThisspecificationisderivedfromtheAdaReferenceManual")
273 Skip_Line_Terminators (Scan_Ptr, Physical);
275 -- If we are at start of physical line, update scan pointers
276 -- to reflect the start of the new line.
279 Current_Line_Start := Scan_Ptr;
280 Start_Column := Set_Start_Column;
281 First_Non_Blank_Location := Scan_Ptr;
285 end Determine_License;
287 ----------------------------
288 -- Determine_Token_Casing --
289 ----------------------------
291 function Determine_Token_Casing return Casing_Type is
293 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
294 end Determine_Token_Casing;
296 -----------------------
297 -- Double_Char_Token --
298 -----------------------
300 function Double_Char_Token (C : Character) return Boolean is
302 if Source (Scan_Ptr + 1) = C then
303 Accumulate_Checksum (C);
304 Scan_Ptr := Scan_Ptr + 2;
307 elsif Source (Scan_Ptr + 1) = ' '
308 and then Source (Scan_Ptr + 2) = C
310 Scan_Ptr := Scan_Ptr + 1;
311 Error_Msg_S ("no space allowed here");
312 Scan_Ptr := Scan_Ptr + 2;
318 end Double_Char_Token;
320 -----------------------------
321 -- Error_Illegal_Character --
322 -----------------------------
324 procedure Error_Illegal_Character is
326 Error_Msg_S ("illegal character");
327 Scan_Ptr := Scan_Ptr + 1;
328 end Error_Illegal_Character;
330 ----------------------------------
331 -- Error_Illegal_Wide_Character --
332 ----------------------------------
334 procedure Error_Illegal_Wide_Character is
338 ("illegal wide character, check " &
339 "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
342 ("illegal wide character, check -gnatW switch");
345 Scan_Ptr := Scan_Ptr + 1;
346 end Error_Illegal_Wide_Character;
348 ---------------------
349 -- Error_Long_Line --
350 ---------------------
352 procedure Error_Long_Line is
355 ("this line is too long",
356 Current_Line_Start + Hostparm.Max_Line_Length);
359 -------------------------------
360 -- Error_No_Double_Underline --
361 -------------------------------
363 procedure Error_No_Double_Underline is
365 Error_Msg_S ("two consecutive underlines not permitted");
366 end Error_No_Double_Underline;
368 -------------------------
369 -- Initialize_Checksum --
370 -------------------------
372 procedure Initialize_Checksum is
374 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
375 end Initialize_Checksum;
377 ------------------------
378 -- Initialize_Scanner --
379 ------------------------
381 procedure Initialize_Scanner
382 (Unit : Unit_Number_Type;
383 Index : Source_File_Index)
385 GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
388 -- Set up Token_Type values in Names Table entries for reserved keywords
389 -- We use the Pos value of the Token_Type value. Note we are relying on
390 -- the fact that Token_Type'Val (0) is not a reserved word!
392 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
393 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
394 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
395 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
396 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
397 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
398 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
399 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
400 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
401 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
402 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
403 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
404 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
405 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
406 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
407 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
408 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
409 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
410 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
411 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
412 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
413 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
414 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
415 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
416 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
417 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
418 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
419 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
420 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
421 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
422 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
423 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
424 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
425 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
426 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
427 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
428 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
429 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
430 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
431 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
432 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
433 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
434 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
435 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
436 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
437 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
438 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
439 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
440 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
441 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
442 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
443 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
444 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
445 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
446 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
447 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
448 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
449 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
450 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
451 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
452 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
453 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
454 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
455 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
456 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
457 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
458 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
459 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
460 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
462 -- Initialize scan control variables
464 Current_Source_File := Index;
465 Source := Source_Text (Current_Source_File);
466 Current_Source_Unit := Unit;
467 Scan_Ptr := Source_First (Current_Source_File);
469 Token_Ptr := Scan_Ptr;
470 Current_Line_Start := Scan_Ptr;
472 Token_Name := No_Name;
473 Start_Column := Set_Start_Column;
474 First_Non_Blank_Location := Scan_Ptr;
478 -- Set default for Comes_From_Source. All nodes built now until we
479 -- reenter the analyzer will have Comes_From_Source set to True
481 Set_Comes_From_Source_Default (True);
483 -- Check license if GNAT type header possibly present
485 if Source_Last (Index) - Scan_Ptr > 80
486 and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
488 Set_License (Current_Source_File, Determine_License);
491 -- Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
495 -- Clear flags for reserved words used as identifiers
497 for J in Token_Type loop
498 Used_As_Identifier (J) := False;
501 end Initialize_Scanner;
507 procedure Nlit is separate;
516 Prev_Token_Ptr := Token_Ptr;
517 Token_Name := Error_Name;
519 -- The following loop runs more than once only if a format effector
520 -- (tab, vertical tab, form feed, line feed, carriage return) is
521 -- encountered and skipped, or some error situation, such as an
522 -- illegal character, is encountered.
525 -- Skip past blanks, loop is opened up for speed
527 while Source (Scan_Ptr) = ' ' loop
529 if Source (Scan_Ptr + 1) /= ' ' then
530 Scan_Ptr := Scan_Ptr + 1;
534 if Source (Scan_Ptr + 2) /= ' ' then
535 Scan_Ptr := Scan_Ptr + 2;
539 if Source (Scan_Ptr + 3) /= ' ' then
540 Scan_Ptr := Scan_Ptr + 3;
544 if Source (Scan_Ptr + 4) /= ' ' then
545 Scan_Ptr := Scan_Ptr + 4;
549 if Source (Scan_Ptr + 5) /= ' ' then
550 Scan_Ptr := Scan_Ptr + 5;
554 if Source (Scan_Ptr + 6) /= ' ' then
555 Scan_Ptr := Scan_Ptr + 6;
559 if Source (Scan_Ptr + 7) /= ' ' then
560 Scan_Ptr := Scan_Ptr + 7;
564 Scan_Ptr := Scan_Ptr + 8;
567 -- We are now at a non-blank character, which is the first character
568 -- of the token we will scan, and hence the value of Token_Ptr.
570 Token_Ptr := Scan_Ptr;
572 -- Here begins the main case statement which transfers control on
573 -- the basis of the non-blank character we have encountered.
575 case Source (Scan_Ptr) is
577 -- Line terminator characters
579 when CR | LF | FF | VT => Line_Terminator_Case : begin
581 -- Check line too long
589 Skip_Line_Terminators (Scan_Ptr, Physical);
591 -- If we are at start of physical line, update scan pointers
592 -- to reflect the start of the new line.
595 Current_Line_Start := Scan_Ptr;
596 Start_Column := Set_Start_Column;
597 First_Non_Blank_Location := Scan_Ptr;
600 end Line_Terminator_Case;
602 -- Horizontal tab, just skip past it
605 if Style_Check then Style.Check_HT; end if;
606 Scan_Ptr := Scan_Ptr + 1;
608 -- End of file character, treated as an end of file only if it
609 -- is the last character in the buffer, otherwise it is ignored.
612 if Scan_Ptr = Source_Last (Current_Source_File) then
618 Scan_Ptr := Scan_Ptr + 1;
624 Accumulate_Checksum ('&');
626 if Source (Scan_Ptr + 1) = '&' then
627 Error_Msg_S ("'&'& should be `AND THEN`");
628 Scan_Ptr := Scan_Ptr + 2;
633 Scan_Ptr := Scan_Ptr + 1;
634 Token := Tok_Ampersand;
638 -- Asterisk (can be multiplication operator or double asterisk
639 -- which is the exponentiation compound delimtier).
642 Accumulate_Checksum ('*');
644 if Source (Scan_Ptr + 1) = '*' then
645 Accumulate_Checksum ('*');
646 Scan_Ptr := Scan_Ptr + 2;
647 Token := Tok_Double_Asterisk;
651 Scan_Ptr := Scan_Ptr + 1;
652 Token := Tok_Asterisk;
656 -- Colon, which can either be an isolated colon, or part of an
657 -- assignment compound delimiter.
660 Accumulate_Checksum (':');
662 if Double_Char_Token ('=') then
663 Token := Tok_Colon_Equal;
664 if Style_Check then Style.Check_Colon_Equal; end if;
667 elsif Source (Scan_Ptr + 1) = '-'
668 and then Source (Scan_Ptr + 2) /= '-'
670 Token := Tok_Colon_Equal;
671 Error_Msg (":- should be :=", Scan_Ptr);
672 Scan_Ptr := Scan_Ptr + 2;
676 Scan_Ptr := Scan_Ptr + 1;
678 if Style_Check then Style.Check_Colon; end if;
685 Accumulate_Checksum ('(');
686 Scan_Ptr := Scan_Ptr + 1;
687 Token := Tok_Left_Paren;
688 if Style_Check then Style.Check_Left_Paren; end if;
694 if Source (Scan_Ptr + 1) = '"' then
696 goto Scan_Identifier;
699 Error_Msg_S ("illegal character, replaced by ""(""");
700 Scan_Ptr := Scan_Ptr + 1;
701 Token := Tok_Left_Paren;
708 Error_Msg_S ("illegal character, replaced by ""(""");
709 Scan_Ptr := Scan_Ptr + 1;
710 Token := Tok_Left_Paren;
716 Accumulate_Checksum (',');
717 Scan_Ptr := Scan_Ptr + 1;
719 if Style_Check then Style.Check_Comma; end if;
722 -- Dot, which is either an isolated period, or part of a double
723 -- dot compound delimiter sequence. We also check for the case of
724 -- a digit following the period, to give a better error message.
727 Accumulate_Checksum ('.');
729 if Double_Char_Token ('.') then
730 Token := Tok_Dot_Dot;
731 if Style_Check then Style.Check_Dot_Dot; end if;
734 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
735 Error_Msg_S ("numeric literal cannot start with point");
736 Scan_Ptr := Scan_Ptr + 1;
739 Scan_Ptr := Scan_Ptr + 1;
744 -- Equal, which can either be an equality operator, or part of the
745 -- arrow (=>) compound delimiter.
748 Accumulate_Checksum ('=');
750 if Double_Char_Token ('>') then
752 if Style_Check then Style.Check_Arrow; end if;
755 elsif Source (Scan_Ptr + 1) = '=' then
756 Error_Msg_S ("== should be =");
757 Scan_Ptr := Scan_Ptr + 1;
760 Scan_Ptr := Scan_Ptr + 1;
764 -- Greater than, which can be a greater than operator, greater than
765 -- or equal operator, or first character of a right label bracket.
768 Accumulate_Checksum ('>');
770 if Double_Char_Token ('=') then
771 Token := Tok_Greater_Equal;
774 elsif Double_Char_Token ('>') then
775 Token := Tok_Greater_Greater;
779 Scan_Ptr := Scan_Ptr + 1;
780 Token := Tok_Greater;
784 -- Less than, which can be a less than operator, less than or equal
785 -- operator, or the first character of a left label bracket, or the
786 -- first character of a box (<>) compound delimiter.
789 Accumulate_Checksum ('<');
791 if Double_Char_Token ('=') then
792 Token := Tok_Less_Equal;
795 elsif Double_Char_Token ('>') then
797 if Style_Check then Style.Check_Box; end if;
800 elsif Double_Char_Token ('<') then
801 Token := Tok_Less_Less;
805 Scan_Ptr := Scan_Ptr + 1;
810 -- Minus, which is either a subtraction operator, or the first
811 -- character of double minus starting a comment
813 when '-' => Minus_Case : begin
814 if Source (Scan_Ptr + 1) = '>' then
815 Error_Msg_S ("invalid token");
816 Scan_Ptr := Scan_Ptr + 2;
820 elsif Source (Scan_Ptr + 1) /= '-' then
821 Accumulate_Checksum ('-');
822 Scan_Ptr := Scan_Ptr + 1;
828 else -- Source (Scan_Ptr + 1) = '-' then
829 if Style_Check then Style.Check_Comment; end if;
830 Scan_Ptr := Scan_Ptr + 2;
832 -- Loop to scan comment (this loop runs more than once only if
833 -- a horizontal tab or other non-graphic character is scanned)
836 -- Scan to non graphic character (opened up for speed)
839 exit when Source (Scan_Ptr) not in Graphic_Character;
840 Scan_Ptr := Scan_Ptr + 1;
841 exit when Source (Scan_Ptr) not in Graphic_Character;
842 Scan_Ptr := Scan_Ptr + 1;
843 exit when Source (Scan_Ptr) not in Graphic_Character;
844 Scan_Ptr := Scan_Ptr + 1;
845 exit when Source (Scan_Ptr) not in Graphic_Character;
846 Scan_Ptr := Scan_Ptr + 1;
847 exit when Source (Scan_Ptr) not in Graphic_Character;
848 Scan_Ptr := Scan_Ptr + 1;
851 -- Keep going if horizontal tab
853 if Source (Scan_Ptr) = HT then
854 if Style_Check then Style.Check_HT; end if;
855 Scan_Ptr := Scan_Ptr + 1;
857 -- Terminate scan of comment if line terminator
859 elsif Source (Scan_Ptr) in Line_Terminator then
862 -- Terminate scan of comment if end of file encountered
863 -- (embedded EOF character or real last character in file)
865 elsif Source (Scan_Ptr) = EOF then
868 -- Keep going if character in 80-FF range, or is ESC. These
869 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
870 -- They are allowed even in Ada 83 mode according to the
871 -- approved AI. ESC was added to the AI in June 93.
873 elsif Source (Scan_Ptr) in Upper_Half_Character
874 or else Source (Scan_Ptr) = ESC
876 Scan_Ptr := Scan_Ptr + 1;
878 -- Otherwise we have an illegal comment character
881 Error_Illegal_Character;
886 -- Note that we do NOT execute a return here, instead we fall
887 -- through to reexecute the scan loop to look for a token.
892 -- Double quote or percent starting a string literal
898 -- Apostrophe. This can either be the start of a character literal,
899 -- or an isolated apostrophe used in a qualified expression or an
900 -- attribute. We treat it as a character literal if it does not
901 -- follow a right parenthesis, identifier, the keyword ALL or
902 -- a literal. This means that we correctly treat constructs like:
904 -- A := CHARACTER'('A');
906 -- Note that RM-2.2(7) does not require a separator between
907 -- "CHARACTER" and "'" in the above.
909 when ''' => Char_Literal_Case : declare
914 Accumulate_Checksum (''');
915 Scan_Ptr := Scan_Ptr + 1;
917 -- Here is where we make the test to distinguish the cases. Treat
918 -- as apostrophe if previous token is an identifier, right paren
919 -- or the reserved word "all" (latter case as in A.all'Address)
920 -- Also treat it as apostrophe after a literal (this catches
921 -- some legitimate cases, like A."abs"'Address, and also gives
922 -- better error behavior for impossible cases like 123'xxx).
924 if Prev_Token = Tok_Identifier
925 or else Prev_Token = Tok_Right_Paren
926 or else Prev_Token = Tok_All
927 or else Prev_Token in Token_Class_Literal
929 Token := Tok_Apostrophe;
932 -- Otherwise the apostrophe starts a character literal
935 -- Case of wide character literal with ESC or [ encoding
937 if (Source (Scan_Ptr) = ESC
939 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
941 (Source (Scan_Ptr) in Upper_Half_Character
945 (Source (Scan_Ptr) = '['
947 Source (Scan_Ptr + 1) = '"')
949 Scan_Wide (Source, Scan_Ptr, Code, Err);
950 Accumulate_Checksum (Code);
953 Error_Illegal_Wide_Character;
956 if Source (Scan_Ptr) /= ''' then
957 Error_Msg_S ("missing apostrophe");
959 Scan_Ptr := Scan_Ptr + 1;
962 -- If we do not find a closing quote in the expected place then
963 -- assume that we have a misguided attempt at a string literal.
965 -- However, if previous token is RANGE, then we return an
966 -- apostrophe instead since this gives better error recovery
968 elsif Source (Scan_Ptr + 1) /= ''' then
970 if Prev_Token = Tok_Range then
971 Token := Tok_Apostrophe;
975 Scan_Ptr := Scan_Ptr - 1;
977 ("strings are delimited by double quote character");
982 -- Otherwise we have a (non-wide) character literal
985 Accumulate_Checksum (Source (Scan_Ptr));
987 if Source (Scan_Ptr) not in Graphic_Character then
988 if Source (Scan_Ptr) in Upper_Half_Character then
990 Error_Illegal_Character;
994 Error_Illegal_Character;
998 Code := Get_Char_Code (Source (Scan_Ptr));
999 Scan_Ptr := Scan_Ptr + 2;
1002 -- Fall through here with Scan_Ptr updated past the closing
1003 -- quote, and Code set to the Char_Code value for the literal
1005 Accumulate_Checksum (''');
1006 Token := Tok_Char_Literal;
1007 Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1008 Set_Char_Literal_Value (Token_Node, Code);
1009 Set_Character_Literal_Name (Code);
1010 Token_Name := Name_Find;
1011 Set_Chars (Token_Node, Token_Name);
1014 end Char_Literal_Case;
1016 -- Right parenthesis
1019 Accumulate_Checksum (')');
1020 Scan_Ptr := Scan_Ptr + 1;
1021 Token := Tok_Right_Paren;
1022 if Style_Check then Style.Check_Right_Paren; end if;
1025 -- Right bracket or right brace, treated as right paren
1028 Error_Msg_S ("illegal character, replaced by "")""");
1029 Scan_Ptr := Scan_Ptr + 1;
1030 Token := Tok_Right_Paren;
1033 -- Slash (can be division operator or first character of not equal)
1036 Accumulate_Checksum ('/');
1038 if Double_Char_Token ('=') then
1039 Token := Tok_Not_Equal;
1042 Scan_Ptr := Scan_Ptr + 1;
1050 Accumulate_Checksum (';');
1051 Scan_Ptr := Scan_Ptr + 1;
1052 Token := Tok_Semicolon;
1053 if Style_Check then Style.Check_Semicolon; end if;
1058 when '|' => Vertical_Bar_Case : begin
1059 Accumulate_Checksum ('|');
1061 -- Special check for || to give nice message
1063 if Source (Scan_Ptr + 1) = '|' then
1064 Error_Msg_S ("""||"" should be `OR ELSE`");
1065 Scan_Ptr := Scan_Ptr + 2;
1070 Scan_Ptr := Scan_Ptr + 1;
1071 Token := Tok_Vertical_Bar;
1072 if Style_Check then Style.Check_Vertical_Bar; end if;
1075 end Vertical_Bar_Case;
1077 -- Exclamation, replacement character for vertical bar
1079 when '!' => Exclamation_Case : begin
1080 Accumulate_Checksum ('!');
1082 if Source (Scan_Ptr + 1) = '=' then
1083 Error_Msg_S ("'!= should be /=");
1084 Scan_Ptr := Scan_Ptr + 2;
1085 Token := Tok_Not_Equal;
1089 Scan_Ptr := Scan_Ptr + 1;
1090 Token := Tok_Vertical_Bar;
1094 end Exclamation_Case;
1098 when '+' => Plus_Case : begin
1099 Accumulate_Checksum ('+');
1100 Scan_Ptr := Scan_Ptr + 1;
1105 -- Digits starting a numeric literal
1110 if Identifier_Char (Source (Scan_Ptr)) then
1112 ("delimiter required between literal and identifier");
1117 -- Lower case letters
1121 Name_Buffer (1) := Source (Scan_Ptr);
1122 Accumulate_Checksum (Name_Buffer (1));
1123 Scan_Ptr := Scan_Ptr + 1;
1124 goto Scan_Identifier;
1126 -- Upper case letters
1131 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1132 Accumulate_Checksum (Name_Buffer (1));
1133 Scan_Ptr := Scan_Ptr + 1;
1134 goto Scan_Identifier;
1136 -- Underline character
1139 Error_Msg_S ("identifier cannot start with underline");
1141 Name_Buffer (1) := '_';
1142 Scan_Ptr := Scan_Ptr + 1;
1143 goto Scan_Identifier;
1145 -- Space (not possible, because we scanned past blanks)
1148 raise Program_Error;
1150 -- Characters in top half of ASCII 8-bit chart
1152 when Upper_Half_Character =>
1154 -- Wide character case. Note that Scan_Identifier will issue
1155 -- an appropriate message if wide characters are not allowed
1158 if Upper_Half_Encoding then
1160 goto Scan_Identifier;
1162 -- Otherwise we have OK Latin-1 character
1165 -- Upper half characters may possibly be identifier letters
1166 -- but can never be digits, so Identifier_Character can be
1167 -- used to test for a valid start of identifier character.
1169 if Identifier_Char (Source (Scan_Ptr)) then
1171 goto Scan_Identifier;
1173 Error_Illegal_Character;
1179 -- ESC character, possible start of identifier if wide characters
1180 -- using ESC encoding are allowed in identifiers, which we can
1181 -- tell by looking at the Identifier_Char flag for ESC, which is
1182 -- only true if these conditions are met.
1184 if Identifier_Char (ESC) then
1186 goto Scan_Identifier;
1188 Error_Illegal_Wide_Character;
1191 -- Invalid control characters
1193 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1194 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1195 EM | FS | GS | RS | US | DEL
1197 Error_Illegal_Character;
1199 -- Invalid graphic characters
1201 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1202 Error_Illegal_Character;
1204 -- End switch on non-blank character
1208 -- End loop past format effectors. The exit from this loop is by
1209 -- executing a return statement following completion of token scan
1210 -- (control never falls out of this loop to the code which follows)
1214 -- Identifier scanning routine. On entry, some initial characters
1215 -- of the identifier may have already been stored in Name_Buffer.
1216 -- If so, Name_Len has the number of characters stored. otherwise
1217 -- Name_Len is set to zero on entry.
1221 -- This loop scans as fast as possible past lower half letters
1222 -- and digits, which we expect to be the most common characters.
1225 if Source (Scan_Ptr) in 'a' .. 'z'
1226 or else Source (Scan_Ptr) in '0' .. '9'
1228 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1229 Accumulate_Checksum (Source (Scan_Ptr));
1231 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1232 Name_Buffer (Name_Len + 1) :=
1233 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1234 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1239 -- Open out the loop a couple of times for speed
1241 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1242 or else Source (Scan_Ptr + 1) in '0' .. '9'
1244 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1245 Accumulate_Checksum (Source (Scan_Ptr + 1));
1247 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1248 Name_Buffer (Name_Len + 2) :=
1249 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1250 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1253 Scan_Ptr := Scan_Ptr + 1;
1254 Name_Len := Name_Len + 1;
1258 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1259 or else Source (Scan_Ptr + 2) in '0' .. '9'
1261 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1262 Accumulate_Checksum (Source (Scan_Ptr + 2));
1264 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1265 Name_Buffer (Name_Len + 3) :=
1266 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1267 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1269 Scan_Ptr := Scan_Ptr + 2;
1270 Name_Len := Name_Len + 2;
1274 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1275 or else Source (Scan_Ptr + 3) in '0' .. '9'
1277 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1278 Accumulate_Checksum (Source (Scan_Ptr + 3));
1280 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1281 Name_Buffer (Name_Len + 4) :=
1282 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1283 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1286 Scan_Ptr := Scan_Ptr + 3;
1287 Name_Len := Name_Len + 3;
1291 Scan_Ptr := Scan_Ptr + 4;
1292 Name_Len := Name_Len + 4;
1295 -- If we fall through, then we have encountered either an underline
1296 -- character, or an extended identifier character (i.e. one from the
1297 -- upper half), or a wide character, or an identifier terminator.
1298 -- The initial test speeds us up in the most common case where we
1299 -- have an identifier terminator. Note that ESC is an identifier
1300 -- character only if a wide character encoding method that uses
1301 -- ESC encoding is active, so if we find an ESC character we know
1302 -- that we have a wide character.
1304 if Identifier_Char (Source (Scan_Ptr)) then
1306 -- Case of underline, check for error cases of double underline,
1307 -- and for a trailing underline character
1309 if Source (Scan_Ptr) = '_' then
1310 Accumulate_Checksum ('_');
1311 Name_Len := Name_Len + 1;
1312 Name_Buffer (Name_Len) := '_';
1314 if Identifier_Char (Source (Scan_Ptr + 1)) then
1315 Scan_Ptr := Scan_Ptr + 1;
1317 if Source (Scan_Ptr) = '_' then
1318 Error_No_Double_Underline;
1322 Error_Msg_S ("identifier cannot end with underline");
1323 Scan_Ptr := Scan_Ptr + 1;
1326 goto Scan_Identifier;
1328 -- Upper half character
1330 elsif Source (Scan_Ptr) in Upper_Half_Character
1331 and then not Upper_Half_Encoding
1333 Accumulate_Checksum (Source (Scan_Ptr));
1334 Store_Encoded_Character
1335 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1336 Scan_Ptr := Scan_Ptr + 1;
1337 goto Scan_Identifier;
1339 -- Left bracket not followed by a quote terminates an identifier.
1340 -- This is an error, but we don't want to give a junk error msg
1341 -- about wide characters in this case!
1343 elsif Source (Scan_Ptr) = '['
1344 and then Source (Scan_Ptr + 1) /= '"'
1348 -- We know we have a wide character encoding here (the current
1349 -- character is either ESC, left bracket, or an upper half
1350 -- character depending on the encoding method).
1353 -- Scan out the wide character and insert the appropriate
1354 -- encoding into the name table entry for the identifier.
1357 Sptr : constant Source_Ptr := Scan_Ptr;
1362 Scan_Wide (Source, Scan_Ptr, Code, Err);
1363 Accumulate_Checksum (Code);
1366 Error_Illegal_Wide_Character;
1368 Store_Encoded_Character (Code);
1371 -- Make sure we are allowing wide characters in identifiers.
1372 -- Note that we allow wide character notation for an OK
1373 -- identifier character. This in particular allows bracket
1374 -- or other notation to be used for upper half letters.
1376 if Identifier_Character_Set /= 'w'
1378 (not In_Character_Range (Code)
1380 not Identifier_Char (Get_Character (Code)))
1383 ("wide character not allowed in identifier", Sptr);
1387 goto Scan_Identifier;
1391 -- Scan of identifier is complete. The identifier is stored in
1392 -- Name_Buffer, and Scan_Ptr points past the last character.
1394 Token_Name := Name_Find;
1396 -- Here is where we check if it was a keyword
1398 if Get_Name_Table_Byte (Token_Name) /= 0
1399 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1401 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1403 -- Deal with possible style check for non-lower case keyword,
1404 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1405 -- for this purpose if they appear as attribute designators.
1406 -- Actually we only check the first character for speed.
1409 and then Source (Token_Ptr) <= 'Z'
1410 and then (Prev_Token /= Tok_Apostrophe
1412 (Token /= Tok_Access
1413 and then Token /= Tok_Delta
1414 and then Token /= Tok_Digits
1415 and then Token /= Tok_Range))
1417 Style.Non_Lower_Case_Keyword;
1420 -- We must reset Token_Name since this is not an identifier
1421 -- and if we leave Token_Name set, the parser gets confused
1422 -- because it thinks it is dealing with an identifier instead
1423 -- of the corresponding keyword.
1425 Token_Name := No_Name;
1428 -- It is an identifier after all
1431 Token_Node := New_Node (N_Identifier, Token_Ptr);
1432 Set_Chars (Token_Node, Token_Name);
1433 Token := Tok_Identifier;
1438 ---------------------
1439 -- Scan_First_Char --
1440 ---------------------
1442 function Scan_First_Char return Source_Ptr is
1443 Ptr : Source_Ptr := Current_Line_Start;
1447 if Source (Ptr) = ' ' then
1450 elsif Source (Ptr) = HT then
1451 if Style_Check then Style.Check_HT; end if;
1458 end Scan_First_Char;
1460 ------------------------------
1461 -- Scan_Reserved_Identifier --
1462 ------------------------------
1464 procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1465 Token_Chars : constant String := Token_Type'Image (Token);
1468 -- We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1469 -- This code extracts the xxx and makes an identifier out of it.
1473 for J in 5 .. Token_Chars'Length loop
1474 Name_Len := Name_Len + 1;
1475 Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1478 Token_Name := Name_Find;
1480 if not Used_As_Identifier (Token) or else Force_Msg then
1481 Error_Msg_Name_1 := Token_Name;
1482 Error_Msg_SC ("reserved word* cannot be used as identifier!");
1483 Used_As_Identifier (Token) := True;
1486 Token := Tok_Identifier;
1487 Token_Node := New_Node (N_Identifier, Token_Ptr);
1488 Set_Chars (Token_Node, Token_Name);
1489 end Scan_Reserved_Identifier;
1491 ----------------------
1492 -- Set_Start_Column --
1493 ----------------------
1495 -- Note: it seems at first glance a little expensive to compute this value
1496 -- for every source line (since it is certainly not used for all source
1497 -- lines). On the other hand, it doesn't take much more work to skip past
1498 -- the initial white space on the line counting the columns than it would
1499 -- to scan past the white space using the standard scanning circuits.
1501 function Set_Start_Column return Column_Number is
1502 Start_Column : Column_Number := 0;
1505 -- Outer loop scans past horizontal tab characters
1509 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1510 -- past the blanks and adjusting Start_Column to account for them.
1513 if Source (Scan_Ptr) = ' ' then
1514 if Source (Scan_Ptr + 1) = ' ' then
1515 if Source (Scan_Ptr + 2) = ' ' then
1516 if Source (Scan_Ptr + 3) = ' ' then
1517 if Source (Scan_Ptr + 4) = ' ' then
1518 if Source (Scan_Ptr + 5) = ' ' then
1519 if Source (Scan_Ptr + 6) = ' ' then
1520 Scan_Ptr := Scan_Ptr + 7;
1521 Start_Column := Start_Column + 7;
1523 Scan_Ptr := Scan_Ptr + 6;
1524 Start_Column := Start_Column + 6;
1528 Scan_Ptr := Scan_Ptr + 5;
1529 Start_Column := Start_Column + 5;
1533 Scan_Ptr := Scan_Ptr + 4;
1534 Start_Column := Start_Column + 4;
1538 Scan_Ptr := Scan_Ptr + 3;
1539 Start_Column := Start_Column + 3;
1543 Scan_Ptr := Scan_Ptr + 2;
1544 Start_Column := Start_Column + 2;
1548 Scan_Ptr := Scan_Ptr + 1;
1549 Start_Column := Start_Column + 1;
1555 end loop Blanks_Loop;
1557 -- Outer loop keeps going only if a horizontal tab follows
1559 if Source (Scan_Ptr) = HT then
1560 if Style_Check then Style.Check_HT; end if;
1561 Scan_Ptr := Scan_Ptr + 1;
1562 Start_Column := (Start_Column / 8) * 8 + 8;
1569 return Start_Column;
1570 end Set_Start_Column;
1576 procedure Slit is separate;