1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
11 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
24 -- GNAT was originally developed by the GNAT team at New York University. --
25 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
27 ------------------------------------------------------------------------------
29 with Atree; use Atree;
30 with Csets; use Csets;
31 with Errout; use Errout;
32 with Hostparm; use Hostparm;
33 with Namet; use Namet;
35 with Scans; use Scans;
36 with Sinput; use Sinput;
37 with Sinfo; use Sinfo;
38 with Snames; use Snames;
40 with Widechar; use Widechar;
42 with System.WCh_Con; use System.WCh_Con;
47 -- Make control characters visible
49 Used_As_Identifier : array (Token_Type) of Boolean;
50 -- Flags set True if a given keyword is used as an identifier (used to
51 -- make sure that we only post an error message for incorrect use of a
52 -- keyword as an identifier once for a given keyword).
54 -----------------------
55 -- Local Subprograms --
56 -----------------------
58 procedure Accumulate_Checksum (C : Character);
59 pragma Inline (Accumulate_Checksum);
60 -- This routine accumulates the checksum given character C. During the
61 -- scanning of a source file, this routine is called with every character
62 -- in the source, excluding blanks, and all control characters (except
63 -- that ESC is included in the checksum). Upper case letters not in string
64 -- literals are folded by the caller. See Sinput spec for the documentation
65 -- of the checksum algorithm. Note: checksum values are only used if we
66 -- generate code, so it is not necessary to worry about making the right
67 -- sequence of calls in any error situation.
69 procedure Accumulate_Checksum (C : Char_Code);
70 pragma Inline (Accumulate_Checksum);
71 -- This version is identical, except that the argument, C, is a character
72 -- code value instead of a character. This is used when wide characters
73 -- are scanned. We use the character code rather than the ASCII characters
74 -- so that the checksum is independent of wide character encoding method.
76 procedure Check_End_Of_Line;
77 -- Called when end of line encountered. Checks that line is not
78 -- too long, and that other style checks for the end of line are met.
80 function Determine_License return License_Type;
81 -- Scan header of file and check that it has an appropriate GNAT-style
82 -- header with a proper license statement. Returns GPL, Unrestricted,
83 -- or Modified_GPL depending on header. If none of these, returns Unknown.
85 function Double_Char_Token (C : Character) return Boolean;
86 -- This function is used for double character tokens like := or <>. It
87 -- checks if the character following Source (Scan_Ptr) is C, and if so
88 -- bumps Scan_Ptr past the pair of characters and returns True. A space
89 -- between the two characters is also recognized with an appropriate
90 -- error message being issued. If C is not present, False is returned.
91 -- Note that Double_Char_Token can only be used for tokens defined in
92 -- the Ada syntax (it's use for error cases like && is not appropriate
93 -- since we do not want a junk message for a case like &-space-&).
95 procedure Error_Illegal_Character;
96 -- Give illegal character error, Scan_Ptr points to character. On return,
97 -- Scan_Ptr is bumped past the illegal character.
99 procedure Error_Illegal_Wide_Character;
100 -- Give illegal wide character message. On return, Scan_Ptr is bumped
101 -- past the illegal character, which may still leave us pointing to
102 -- junk, not much we can do if the escape sequence is messed up!
104 procedure Error_Long_Line;
105 -- Signal error of excessively long line
107 procedure Error_No_Double_Underline;
108 -- Signal error of double underline character
111 -- This is the procedure for scanning out numeric literals. On entry,
112 -- Scan_Ptr points to the digit that starts the numeric literal (the
113 -- checksum for this character has not been accumulated yet). On return
114 -- Scan_Ptr points past the last character of the numeric literal, Token
115 -- and Token_Node are set appropriately, and the checksum is updated.
117 function Set_Start_Column return Column_Number;
118 -- This routine is called with Scan_Ptr pointing to the first character
119 -- of a line. On exit, Scan_Ptr is advanced to the first non-blank
120 -- character of this line (or to the terminating format effector if the
121 -- line contains no non-blank characters), and the returned result is the
122 -- column number of this non-blank character (zero origin), which is the
123 -- value to be stored in the Start_Column scan variable.
126 -- This is the procedure for scanning out string literals. On entry,
127 -- Scan_Ptr points to the opening string quote (the checksum for this
128 -- character has not been accumulated yet). On return Scan_Ptr points
129 -- past the closing quote of the string literal, Token and Token_Node
130 -- are set appropriately, and the checksum is upated.
132 -------------------------
133 -- Accumulate_Checksum --
134 -------------------------
136 procedure Accumulate_Checksum (C : Character) is
138 Checksum := Checksum + Checksum + Character'Pos (C);
140 if Checksum > 16#8000_0000# then
141 Checksum := (Checksum + 1) and 16#7FFF_FFFF#;
143 end Accumulate_Checksum;
145 procedure Accumulate_Checksum (C : Char_Code) is
147 Checksum := Checksum + Checksum + Char_Code'Pos (C);
149 if Checksum > 16#8000_0000# then
150 Checksum := (Checksum + 1) and 16#7FFF_FFFF#;
152 end Accumulate_Checksum;
154 -----------------------
155 -- Check_End_Of_Line --
156 -----------------------
158 procedure Check_End_Of_Line is
159 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
162 if Len > Hostparm.Max_Line_Length then
165 elsif Style_Check then
166 Style.Check_Line_Terminator (Len);
168 end Check_End_Of_Line;
170 -----------------------
171 -- Determine_License --
172 -----------------------
174 function Determine_License return License_Type is
175 GPL_Found : Boolean := False;
177 function Contains (S : String) return Boolean;
178 -- See if current comment contains successive non-blank characters
179 -- matching the contents of S. If so leave Scan_Ptr unchanged and
180 -- return True, otherwise leave Scan_Ptr unchanged and return False.
183 -- Skip to line terminator character
189 function Contains (S : String) return Boolean is
196 while Source (SP) /= CR and then Source (SP) /= LF loop
197 if Source (SP) = S (S'First) then
209 while Source (SS) = ' ' loop
213 exit when Source (SS) /= S (CP);
227 procedure Skip_EOL is
229 while Source (Scan_Ptr) /= CR
230 and then Source (Scan_Ptr) /= LF
232 Scan_Ptr := Scan_Ptr + 1;
236 -- Start of processing for Determine_License
240 if Source (Scan_Ptr) /= '-'
241 or else Source (Scan_Ptr + 1) /= '-'
249 elsif Contains ("Asaspecialexception") then
254 elsif Contains ("GNUGeneralPublicLicense") then
259 ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
262 ("ThisspecificationisderivedfromtheAdaReferenceManual")
275 Skip_Line_Terminators (Scan_Ptr, Physical);
277 -- If we are at start of physical line, update scan pointers
278 -- to reflect the start of the new line.
281 Current_Line_Start := Scan_Ptr;
282 Start_Column := Set_Start_Column;
283 First_Non_Blank_Location := Scan_Ptr;
287 end Determine_License;
289 ----------------------------
290 -- Determine_Token_Casing --
291 ----------------------------
293 function Determine_Token_Casing return Casing_Type is
295 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
296 end Determine_Token_Casing;
298 -----------------------
299 -- Double_Char_Token --
300 -----------------------
302 function Double_Char_Token (C : Character) return Boolean is
304 if Source (Scan_Ptr + 1) = C then
305 Accumulate_Checksum (C);
306 Scan_Ptr := Scan_Ptr + 2;
309 elsif Source (Scan_Ptr + 1) = ' '
310 and then Source (Scan_Ptr + 2) = C
312 Scan_Ptr := Scan_Ptr + 1;
313 Error_Msg_S ("no space allowed here");
314 Scan_Ptr := Scan_Ptr + 2;
320 end Double_Char_Token;
322 -----------------------------
323 -- Error_Illegal_Character --
324 -----------------------------
326 procedure Error_Illegal_Character is
328 Error_Msg_S ("illegal character");
329 Scan_Ptr := Scan_Ptr + 1;
330 end Error_Illegal_Character;
332 ----------------------------------
333 -- Error_Illegal_Wide_Character --
334 ----------------------------------
336 procedure Error_Illegal_Wide_Character is
340 ("illegal wide character, check " &
341 "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
344 ("illegal wide character, check -gnatW switch");
347 Scan_Ptr := Scan_Ptr + 1;
348 end Error_Illegal_Wide_Character;
350 ---------------------
351 -- Error_Long_Line --
352 ---------------------
354 procedure Error_Long_Line is
357 ("this line is too long",
358 Current_Line_Start + Hostparm.Max_Line_Length);
361 -------------------------------
362 -- Error_No_Double_Underline --
363 -------------------------------
365 procedure Error_No_Double_Underline is
367 Error_Msg_S ("two consecutive underlines not permitted");
368 end Error_No_Double_Underline;
370 ------------------------
371 -- Initialize_Scanner --
372 ------------------------
374 procedure Initialize_Scanner
375 (Unit : Unit_Number_Type;
376 Index : Source_File_Index)
378 GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
381 -- Set up Token_Type values in Names Table entries for reserved keywords
382 -- We use the Pos value of the Token_Type value. Note we are relying on
383 -- the fact that Token_Type'Val (0) is not a reserved word!
385 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
386 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
387 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
388 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
389 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
390 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
391 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
392 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
393 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
394 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
395 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
396 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
397 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
398 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
399 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
400 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
401 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
402 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
403 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
404 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
405 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
406 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
407 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
408 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
409 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
410 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
411 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
412 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
413 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
414 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
415 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
416 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
417 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
418 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
419 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
420 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
421 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
422 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
423 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
424 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
425 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
426 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
427 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
428 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
429 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
430 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
431 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
432 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
433 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
434 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
435 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
436 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
437 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
438 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
439 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
440 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
441 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
442 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
443 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
444 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
445 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
446 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
447 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
448 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
449 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
450 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
451 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
452 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
453 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
455 -- Initialize scan control variables
457 Current_Source_File := Index;
458 Source := Source_Text (Current_Source_File);
459 Current_Source_Unit := Unit;
460 Scan_Ptr := Source_First (Current_Source_File);
462 Token_Ptr := Scan_Ptr;
463 Current_Line_Start := Scan_Ptr;
465 Token_Name := No_Name;
466 Start_Column := Set_Start_Column;
467 First_Non_Blank_Location := Scan_Ptr;
470 -- Set default for Comes_From_Source. All nodes built now until we
471 -- reenter the analyzer will have Comes_From_Source set to True
473 Set_Comes_From_Source_Default (True);
475 -- Check license if GNAT type header possibly present
477 if Source_Last (Index) - Scan_Ptr > 80
478 and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
480 Set_License (Current_Source_File, Determine_License);
483 -- Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
487 -- Clear flags for reserved words used as indentifiers
489 for J in Token_Type loop
490 Used_As_Identifier (J) := False;
493 end Initialize_Scanner;
499 procedure Nlit is separate;
508 Prev_Token_Ptr := Token_Ptr;
509 Token_Name := Error_Name;
511 -- The following loop runs more than once only if a format effector
512 -- (tab, vertical tab, form feed, line feed, carriage return) is
513 -- encountered and skipped, or some error situation, such as an
514 -- illegal character, is encountered.
517 -- Skip past blanks, loop is opened up for speed
519 while Source (Scan_Ptr) = ' ' loop
521 if Source (Scan_Ptr + 1) /= ' ' then
522 Scan_Ptr := Scan_Ptr + 1;
526 if Source (Scan_Ptr + 2) /= ' ' then
527 Scan_Ptr := Scan_Ptr + 2;
531 if Source (Scan_Ptr + 3) /= ' ' then
532 Scan_Ptr := Scan_Ptr + 3;
536 if Source (Scan_Ptr + 4) /= ' ' then
537 Scan_Ptr := Scan_Ptr + 4;
541 if Source (Scan_Ptr + 5) /= ' ' then
542 Scan_Ptr := Scan_Ptr + 5;
546 if Source (Scan_Ptr + 6) /= ' ' then
547 Scan_Ptr := Scan_Ptr + 6;
551 if Source (Scan_Ptr + 7) /= ' ' then
552 Scan_Ptr := Scan_Ptr + 7;
556 Scan_Ptr := Scan_Ptr + 8;
559 -- We are now at a non-blank character, which is the first character
560 -- of the token we will scan, and hence the value of Token_Ptr.
562 Token_Ptr := Scan_Ptr;
564 -- Here begins the main case statement which transfers control on
565 -- the basis of the non-blank character we have encountered.
567 case Source (Scan_Ptr) is
569 -- Line terminator characters
571 when CR | LF | FF | VT => Line_Terminator_Case : begin
573 -- Check line too long
581 Skip_Line_Terminators (Scan_Ptr, Physical);
583 -- If we are at start of physical line, update scan pointers
584 -- to reflect the start of the new line.
587 Current_Line_Start := Scan_Ptr;
588 Start_Column := Set_Start_Column;
589 First_Non_Blank_Location := Scan_Ptr;
592 end Line_Terminator_Case;
594 -- Horizontal tab, just skip past it
597 if Style_Check then Style.Check_HT; end if;
598 Scan_Ptr := Scan_Ptr + 1;
600 -- End of file character, treated as an end of file only if it
601 -- is the last character in the buffer, otherwise it is ignored.
604 if Scan_Ptr = Source_Last (Current_Source_File) then
610 Scan_Ptr := Scan_Ptr + 1;
616 Accumulate_Checksum ('&');
618 if Source (Scan_Ptr + 1) = '&' then
619 Error_Msg_S ("'&'& should be `AND THEN`");
620 Scan_Ptr := Scan_Ptr + 2;
625 Scan_Ptr := Scan_Ptr + 1;
626 Token := Tok_Ampersand;
630 -- Asterisk (can be multiplication operator or double asterisk
631 -- which is the exponentiation compound delimtier).
634 Accumulate_Checksum ('*');
636 if Source (Scan_Ptr + 1) = '*' then
637 Accumulate_Checksum ('*');
638 Scan_Ptr := Scan_Ptr + 2;
639 Token := Tok_Double_Asterisk;
643 Scan_Ptr := Scan_Ptr + 1;
644 Token := Tok_Asterisk;
648 -- Colon, which can either be an isolated colon, or part of an
649 -- assignment compound delimiter.
652 Accumulate_Checksum (':');
654 if Double_Char_Token ('=') then
655 Token := Tok_Colon_Equal;
656 if Style_Check then Style.Check_Colon_Equal; end if;
659 elsif Source (Scan_Ptr + 1) = '-'
660 and then Source (Scan_Ptr + 2) /= '-'
662 Token := Tok_Colon_Equal;
663 Error_Msg (":- should be :=", Scan_Ptr);
664 Scan_Ptr := Scan_Ptr + 2;
668 Scan_Ptr := Scan_Ptr + 1;
670 if Style_Check then Style.Check_Colon; end if;
677 Accumulate_Checksum ('(');
678 Scan_Ptr := Scan_Ptr + 1;
679 Token := Tok_Left_Paren;
680 if Style_Check then Style.Check_Left_Paren; end if;
686 if Source (Scan_Ptr + 1) = '"' then
688 goto Scan_Identifier;
691 Error_Msg_S ("illegal character, replaced by ""(""");
692 Scan_Ptr := Scan_Ptr + 1;
693 Token := Tok_Left_Paren;
700 Error_Msg_S ("illegal character, replaced by ""(""");
701 Scan_Ptr := Scan_Ptr + 1;
702 Token := Tok_Left_Paren;
708 Accumulate_Checksum (',');
709 Scan_Ptr := Scan_Ptr + 1;
711 if Style_Check then Style.Check_Comma; end if;
714 -- Dot, which is either an isolated period, or part of a double
715 -- dot compound delimiter sequence. We also check for the case of
716 -- a digit following the period, to give a better error message.
719 Accumulate_Checksum ('.');
721 if Double_Char_Token ('.') then
722 Token := Tok_Dot_Dot;
723 if Style_Check then Style.Check_Dot_Dot; end if;
726 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
727 Error_Msg_S ("numeric literal cannot start with point");
728 Scan_Ptr := Scan_Ptr + 1;
731 Scan_Ptr := Scan_Ptr + 1;
736 -- Equal, which can either be an equality operator, or part of the
737 -- arrow (=>) compound delimiter.
740 Accumulate_Checksum ('=');
742 if Double_Char_Token ('>') then
744 if Style_Check then Style.Check_Arrow; end if;
747 elsif Source (Scan_Ptr + 1) = '=' then
748 Error_Msg_S ("== should be =");
749 Scan_Ptr := Scan_Ptr + 1;
752 Scan_Ptr := Scan_Ptr + 1;
756 -- Greater than, which can be a greater than operator, greater than
757 -- or equal operator, or first character of a right label bracket.
760 Accumulate_Checksum ('>');
762 if Double_Char_Token ('=') then
763 Token := Tok_Greater_Equal;
766 elsif Double_Char_Token ('>') then
767 Token := Tok_Greater_Greater;
771 Scan_Ptr := Scan_Ptr + 1;
772 Token := Tok_Greater;
776 -- Less than, which can be a less than operator, less than or equal
777 -- operator, or the first character of a left label bracket, or the
778 -- first character of a box (<>) compound delimiter.
781 Accumulate_Checksum ('<');
783 if Double_Char_Token ('=') then
784 Token := Tok_Less_Equal;
787 elsif Double_Char_Token ('>') then
789 if Style_Check then Style.Check_Box; end if;
792 elsif Double_Char_Token ('<') then
793 Token := Tok_Less_Less;
797 Scan_Ptr := Scan_Ptr + 1;
802 -- Minus, which is either a subtraction operator, or the first
803 -- character of double minus starting a comment
805 when '-' => Minus_Case : begin
806 if Source (Scan_Ptr + 1) = '>' then
807 Error_Msg_S ("invalid token");
808 Scan_Ptr := Scan_Ptr + 2;
812 elsif Source (Scan_Ptr + 1) /= '-' then
813 Accumulate_Checksum ('-');
814 Scan_Ptr := Scan_Ptr + 1;
820 else -- Source (Scan_Ptr + 1) = '-' then
821 if Style_Check then Style.Check_Comment; end if;
822 Scan_Ptr := Scan_Ptr + 2;
824 -- Loop to scan comment (this loop runs more than once only if
825 -- a horizontal tab or other non-graphic character is scanned)
828 -- Scan to non graphic character (opened up for speed)
831 exit when Source (Scan_Ptr) not in Graphic_Character;
832 Scan_Ptr := Scan_Ptr + 1;
833 exit when Source (Scan_Ptr) not in Graphic_Character;
834 Scan_Ptr := Scan_Ptr + 1;
835 exit when Source (Scan_Ptr) not in Graphic_Character;
836 Scan_Ptr := Scan_Ptr + 1;
837 exit when Source (Scan_Ptr) not in Graphic_Character;
838 Scan_Ptr := Scan_Ptr + 1;
839 exit when Source (Scan_Ptr) not in Graphic_Character;
840 Scan_Ptr := Scan_Ptr + 1;
843 -- Keep going if horizontal tab
845 if Source (Scan_Ptr) = HT then
846 if Style_Check then Style.Check_HT; end if;
847 Scan_Ptr := Scan_Ptr + 1;
849 -- Terminate scan of comment if line terminator
851 elsif Source (Scan_Ptr) in Line_Terminator then
854 -- Terminate scan of comment if end of file encountered
855 -- (embedded EOF character or real last character in file)
857 elsif Source (Scan_Ptr) = EOF then
860 -- Keep going if character in 80-FF range, or is ESC. These
861 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
862 -- They are allowed even in Ada 83 mode according to the
863 -- approved AI. ESC was added to the AI in June 93.
865 elsif Source (Scan_Ptr) in Upper_Half_Character
866 or else Source (Scan_Ptr) = ESC
868 Scan_Ptr := Scan_Ptr + 1;
870 -- Otherwise we have an illegal comment character
873 Error_Illegal_Character;
878 -- Note that we do NOT execute a return here, instead we fall
879 -- through to reexecute the scan loop to look for a token.
884 -- Double quote or percent starting a string literal
890 -- Apostrophe. This can either be the start of a character literal,
891 -- or an isolated apostrophe used in a qualified expression or an
892 -- attribute. We treat it as a character literal if it does not
893 -- follow a right parenthesis, identifier, the keyword ALL or
894 -- a literal. This means that we correctly treat constructs like:
896 -- A := CHARACTER'('A');
898 -- Note that RM-2.2(7) does not require a separator between
899 -- "CHARACTER" and "'" in the above.
901 when ''' => Char_Literal_Case : declare
906 Accumulate_Checksum (''');
907 Scan_Ptr := Scan_Ptr + 1;
909 -- Here is where we make the test to distinguish the cases. Treat
910 -- as apostrophe if previous token is an identifier, right paren
911 -- or the reserved word "all" (latter case as in A.all'Address)
912 -- Also treat it as apostrophe after a literal (this catches
913 -- some legitimate cases, like A."abs"'Address, and also gives
914 -- better error behavior for impossible cases like 123'xxx).
916 if Prev_Token = Tok_Identifier
917 or else Prev_Token = Tok_Right_Paren
918 or else Prev_Token = Tok_All
919 or else Prev_Token in Token_Class_Literal
921 Token := Tok_Apostrophe;
924 -- Otherwise the apostrophe starts a character literal
927 -- Case of wide character literal with ESC or [ encoding
929 if (Source (Scan_Ptr) = ESC
931 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
933 (Source (Scan_Ptr) in Upper_Half_Character
937 (Source (Scan_Ptr) = '['
939 Source (Scan_Ptr + 1) = '"')
941 Scan_Wide (Source, Scan_Ptr, Code, Err);
942 Accumulate_Checksum (Code);
945 Error_Illegal_Wide_Character;
948 if Source (Scan_Ptr) /= ''' then
949 Error_Msg_S ("missing apostrophe");
951 Scan_Ptr := Scan_Ptr + 1;
954 -- If we do not find a closing quote in the expected place then
955 -- assume that we have a misguided attempt at a string literal.
957 -- However, if previous token is RANGE, then we return an
958 -- apostrophe instead since this gives better error recovery
960 elsif Source (Scan_Ptr + 1) /= ''' then
962 if Prev_Token = Tok_Range then
963 Token := Tok_Apostrophe;
967 Scan_Ptr := Scan_Ptr - 1;
969 ("strings are delimited by double quote character");
974 -- Otherwise we have a (non-wide) character literal
977 Accumulate_Checksum (Source (Scan_Ptr));
979 if Source (Scan_Ptr) not in Graphic_Character then
980 if Source (Scan_Ptr) in Upper_Half_Character then
982 Error_Illegal_Character;
986 Error_Illegal_Character;
990 Code := Get_Char_Code (Source (Scan_Ptr));
991 Scan_Ptr := Scan_Ptr + 2;
994 -- Fall through here with Scan_Ptr updated past the closing
995 -- quote, and Code set to the Char_Code value for the literal
997 Accumulate_Checksum (''');
998 Token := Tok_Char_Literal;
999 Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1000 Set_Char_Literal_Value (Token_Node, Code);
1001 Set_Character_Literal_Name (Code);
1002 Token_Name := Name_Find;
1003 Set_Chars (Token_Node, Token_Name);
1006 end Char_Literal_Case;
1008 -- Right parenthesis
1011 Accumulate_Checksum (')');
1012 Scan_Ptr := Scan_Ptr + 1;
1013 Token := Tok_Right_Paren;
1014 if Style_Check then Style.Check_Right_Paren; end if;
1017 -- Right bracket or right brace, treated as right paren
1020 Error_Msg_S ("illegal character, replaced by "")""");
1021 Scan_Ptr := Scan_Ptr + 1;
1022 Token := Tok_Right_Paren;
1025 -- Slash (can be division operator or first character of not equal)
1028 Accumulate_Checksum ('/');
1030 if Double_Char_Token ('=') then
1031 Token := Tok_Not_Equal;
1034 Scan_Ptr := Scan_Ptr + 1;
1042 Accumulate_Checksum (';');
1043 Scan_Ptr := Scan_Ptr + 1;
1044 Token := Tok_Semicolon;
1045 if Style_Check then Style.Check_Semicolon; end if;
1050 when '|' => Vertical_Bar_Case : begin
1051 Accumulate_Checksum ('|');
1053 -- Special check for || to give nice message
1055 if Source (Scan_Ptr + 1) = '|' then
1056 Error_Msg_S ("""||"" should be `OR ELSE`");
1057 Scan_Ptr := Scan_Ptr + 2;
1062 Scan_Ptr := Scan_Ptr + 1;
1063 Token := Tok_Vertical_Bar;
1064 if Style_Check then Style.Check_Vertical_Bar; end if;
1067 end Vertical_Bar_Case;
1069 -- Exclamation, replacement character for vertical bar
1071 when '!' => Exclamation_Case : begin
1072 Accumulate_Checksum ('!');
1074 if Source (Scan_Ptr + 1) = '=' then
1075 Error_Msg_S ("'!= should be /=");
1076 Scan_Ptr := Scan_Ptr + 2;
1077 Token := Tok_Not_Equal;
1081 Scan_Ptr := Scan_Ptr + 1;
1082 Token := Tok_Vertical_Bar;
1086 end Exclamation_Case;
1090 when '+' => Plus_Case : begin
1091 Accumulate_Checksum ('+');
1092 Scan_Ptr := Scan_Ptr + 1;
1097 -- Digits starting a numeric literal
1102 if Identifier_Char (Source (Scan_Ptr)) then
1104 ("delimiter required between literal and identifier");
1109 -- Lower case letters
1113 Name_Buffer (1) := Source (Scan_Ptr);
1114 Accumulate_Checksum (Name_Buffer (1));
1115 Scan_Ptr := Scan_Ptr + 1;
1116 goto Scan_Identifier;
1118 -- Upper case letters
1123 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1124 Accumulate_Checksum (Name_Buffer (1));
1125 Scan_Ptr := Scan_Ptr + 1;
1126 goto Scan_Identifier;
1128 -- Underline character
1131 Error_Msg_S ("identifier cannot start with underline");
1133 Name_Buffer (1) := '_';
1134 Scan_Ptr := Scan_Ptr + 1;
1135 goto Scan_Identifier;
1137 -- Space (not possible, because we scanned past blanks)
1140 raise Program_Error;
1142 -- Characters in top half of ASCII 8-bit chart
1144 when Upper_Half_Character =>
1146 -- Wide character case. Note that Scan_Identifier will issue
1147 -- an appropriate message if wide characters are not allowed
1150 if Upper_Half_Encoding then
1152 goto Scan_Identifier;
1154 -- Otherwise we have OK Latin-1 character
1157 -- Upper half characters may possibly be identifier letters
1158 -- but can never be digits, so Identifier_Character can be
1159 -- used to test for a valid start of identifier character.
1161 if Identifier_Char (Source (Scan_Ptr)) then
1163 goto Scan_Identifier;
1165 Error_Illegal_Character;
1171 -- ESC character, possible start of identifier if wide characters
1172 -- using ESC encoding are allowed in identifiers, which we can
1173 -- tell by looking at the Identifier_Char flag for ESC, which is
1174 -- only true if these conditions are met.
1176 if Identifier_Char (ESC) then
1178 goto Scan_Identifier;
1180 Error_Illegal_Wide_Character;
1183 -- Invalid control characters
1185 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1186 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1187 EM | FS | GS | RS | US | DEL
1189 Error_Illegal_Character;
1191 -- Invalid graphic characters
1193 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1194 Error_Illegal_Character;
1196 -- End switch on non-blank character
1200 -- End loop past format effectors. The exit from this loop is by
1201 -- executing a return statement following completion of token scan
1202 -- (control never falls out of this loop to the code which follows)
1206 -- Identifier scanning routine. On entry, some initial characters
1207 -- of the identifier may have already been stored in Name_Buffer.
1208 -- If so, Name_Len has the number of characters stored. otherwise
1209 -- Name_Len is set to zero on entry.
1213 -- This loop scans as fast as possible past lower half letters
1214 -- and digits, which we expect to be the most common characters.
1217 if Source (Scan_Ptr) in 'a' .. 'z'
1218 or else Source (Scan_Ptr) in '0' .. '9'
1220 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1221 Accumulate_Checksum (Source (Scan_Ptr));
1223 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1224 Name_Buffer (Name_Len + 1) :=
1225 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1226 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1231 -- Open out the loop a couple of times for speed
1233 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1234 or else Source (Scan_Ptr + 1) in '0' .. '9'
1236 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1237 Accumulate_Checksum (Source (Scan_Ptr + 1));
1239 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1240 Name_Buffer (Name_Len + 2) :=
1241 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1242 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1245 Scan_Ptr := Scan_Ptr + 1;
1246 Name_Len := Name_Len + 1;
1250 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1251 or else Source (Scan_Ptr + 2) in '0' .. '9'
1253 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1254 Accumulate_Checksum (Source (Scan_Ptr + 2));
1256 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1257 Name_Buffer (Name_Len + 3) :=
1258 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1259 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1261 Scan_Ptr := Scan_Ptr + 2;
1262 Name_Len := Name_Len + 2;
1266 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1267 or else Source (Scan_Ptr + 3) in '0' .. '9'
1269 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1270 Accumulate_Checksum (Source (Scan_Ptr + 3));
1272 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1273 Name_Buffer (Name_Len + 4) :=
1274 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1275 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1278 Scan_Ptr := Scan_Ptr + 3;
1279 Name_Len := Name_Len + 3;
1283 Scan_Ptr := Scan_Ptr + 4;
1284 Name_Len := Name_Len + 4;
1287 -- If we fall through, then we have encountered either an underline
1288 -- character, or an extended identifier character (i.e. one from the
1289 -- upper half), or a wide character, or an identifier terminator.
1290 -- The initial test speeds us up in the most common case where we
1291 -- have an identifier terminator. Note that ESC is an identifier
1292 -- character only if a wide character encoding method that uses
1293 -- ESC encoding is active, so if we find an ESC character we know
1294 -- that we have a wide character.
1296 if Identifier_Char (Source (Scan_Ptr)) then
1298 -- Case of underline, check for error cases of double underline,
1299 -- and for a trailing underline character
1301 if Source (Scan_Ptr) = '_' then
1302 Accumulate_Checksum ('_');
1303 Name_Len := Name_Len + 1;
1304 Name_Buffer (Name_Len) := '_';
1306 if Identifier_Char (Source (Scan_Ptr + 1)) then
1307 Scan_Ptr := Scan_Ptr + 1;
1309 if Source (Scan_Ptr) = '_' then
1310 Error_No_Double_Underline;
1314 Error_Msg_S ("identifier cannot end with underline");
1315 Scan_Ptr := Scan_Ptr + 1;
1318 goto Scan_Identifier;
1320 -- Upper half character
1322 elsif Source (Scan_Ptr) in Upper_Half_Character
1323 and then not Upper_Half_Encoding
1325 Accumulate_Checksum (Source (Scan_Ptr));
1326 Store_Encoded_Character
1327 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1328 Scan_Ptr := Scan_Ptr + 1;
1329 goto Scan_Identifier;
1331 -- Left bracket not followed by a quote terminates an identifier.
1332 -- This is an error, but we don't want to give a junk error msg
1333 -- about wide characters in this case!
1335 elsif Source (Scan_Ptr) = '['
1336 and then Source (Scan_Ptr + 1) /= '"'
1340 -- We know we have a wide character encoding here (the current
1341 -- character is either ESC, left bracket, or an upper half
1342 -- character depending on the encoding method).
1345 -- Scan out the wide character and insert the appropriate
1346 -- encoding into the name table entry for the identifier.
1349 Sptr : constant Source_Ptr := Scan_Ptr;
1354 Scan_Wide (Source, Scan_Ptr, Code, Err);
1355 Accumulate_Checksum (Code);
1358 Error_Illegal_Wide_Character;
1360 Store_Encoded_Character (Code);
1363 -- Make sure we are allowing wide characters in identifiers.
1364 -- Note that we allow wide character notation for an OK
1365 -- identifier character. This in particular allows bracket
1366 -- or other notation to be used for upper half letters.
1368 if Identifier_Character_Set /= 'w'
1370 (not In_Character_Range (Code)
1372 not Identifier_Char (Get_Character (Code)))
1375 ("wide character not allowed in identifier", Sptr);
1379 goto Scan_Identifier;
1383 -- Scan of identifier is complete. The identifier is stored in
1384 -- Name_Buffer, and Scan_Ptr points past the last character.
1386 Token_Name := Name_Find;
1388 -- Here is where we check if it was a keyword
1390 if Get_Name_Table_Byte (Token_Name) /= 0
1391 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1393 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1395 -- Deal with possible style check for non-lower case keyword,
1396 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1397 -- for this purpose if they appear as attribute designators.
1398 -- Actually we only check the first character for speed.
1401 and then Source (Token_Ptr) <= 'Z'
1402 and then (Prev_Token /= Tok_Apostrophe
1404 (Token /= Tok_Access
1405 and then Token /= Tok_Delta
1406 and then Token /= Tok_Digits
1407 and then Token /= Tok_Range))
1409 Style.Non_Lower_Case_Keyword;
1412 -- We must reset Token_Name since this is not an identifier
1413 -- and if we leave Token_Name set, the parser gets confused
1414 -- because it thinks it is dealing with an identifier instead
1415 -- of the corresponding keyword.
1417 Token_Name := No_Name;
1420 -- It is an identifier after all
1423 Token_Node := New_Node (N_Identifier, Token_Ptr);
1424 Set_Chars (Token_Node, Token_Name);
1425 Token := Tok_Identifier;
1430 ---------------------
1431 -- Scan_First_Char --
1432 ---------------------
1434 function Scan_First_Char return Source_Ptr is
1435 Ptr : Source_Ptr := Current_Line_Start;
1439 if Source (Ptr) = ' ' then
1442 elsif Source (Ptr) = HT then
1443 if Style_Check then Style.Check_HT; end if;
1450 end Scan_First_Char;
1452 ------------------------------
1453 -- Scan_Reserved_Identifier --
1454 ------------------------------
1456 procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1457 Token_Chars : constant String := Token_Type'Image (Token);
1460 -- We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1461 -- This code extracts the xxx and makes an identifier out of it.
1465 for J in 5 .. Token_Chars'Length loop
1466 Name_Len := Name_Len + 1;
1467 Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1470 Token_Name := Name_Find;
1472 if not Used_As_Identifier (Token) or else Force_Msg then
1473 Error_Msg_Name_1 := Token_Name;
1474 Error_Msg_SC ("reserved word* cannot be used as identifier!");
1475 Used_As_Identifier (Token) := True;
1478 Token := Tok_Identifier;
1479 Token_Node := New_Node (N_Identifier, Token_Ptr);
1480 Set_Chars (Token_Node, Token_Name);
1481 end Scan_Reserved_Identifier;
1483 ----------------------
1484 -- Set_Start_Column --
1485 ----------------------
1487 -- Note: it seems at first glance a little expensive to compute this value
1488 -- for every source line (since it is certainly not used for all source
1489 -- lines). On the other hand, it doesn't take much more work to skip past
1490 -- the initial white space on the line counting the columns than it would
1491 -- to scan past the white space using the standard scanning circuits.
1493 function Set_Start_Column return Column_Number is
1494 Start_Column : Column_Number := 0;
1497 -- Outer loop scans past horizontal tab characters
1501 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1502 -- past the blanks and adjusting Start_Column to account for them.
1505 if Source (Scan_Ptr) = ' ' then
1506 if Source (Scan_Ptr + 1) = ' ' then
1507 if Source (Scan_Ptr + 2) = ' ' then
1508 if Source (Scan_Ptr + 3) = ' ' then
1509 if Source (Scan_Ptr + 4) = ' ' then
1510 if Source (Scan_Ptr + 5) = ' ' then
1511 if Source (Scan_Ptr + 6) = ' ' then
1512 Scan_Ptr := Scan_Ptr + 7;
1513 Start_Column := Start_Column + 7;
1515 Scan_Ptr := Scan_Ptr + 6;
1516 Start_Column := Start_Column + 6;
1520 Scan_Ptr := Scan_Ptr + 5;
1521 Start_Column := Start_Column + 5;
1525 Scan_Ptr := Scan_Ptr + 4;
1526 Start_Column := Start_Column + 4;
1530 Scan_Ptr := Scan_Ptr + 3;
1531 Start_Column := Start_Column + 3;
1535 Scan_Ptr := Scan_Ptr + 2;
1536 Start_Column := Start_Column + 2;
1540 Scan_Ptr := Scan_Ptr + 1;
1541 Start_Column := Start_Column + 1;
1547 end loop Blanks_Loop;
1549 -- Outer loop keeps going only if a horizontal tab follows
1551 if Source (Scan_Ptr) = HT then
1552 if Style_Check then Style.Check_HT; end if;
1553 Scan_Ptr := Scan_Ptr + 1;
1554 Start_Column := (Start_Column / 8) * 8 + 8;
1561 return Start_Column;
1562 end Set_Start_Column;
1568 procedure Slit is separate;