gcc/ada/scn.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                  S C N                                   --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --                            $Revision: 1.111 $
  10 --                                                                          --
  11 --          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
  12 --                                                                          --
  13 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  14 -- terms of the  GNU General Public License as published  by the Free Soft- --
  15 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  16 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  17 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  18 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  19 -- for  more details.  You should have  received  a copy of the GNU General --
  20 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  21 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  22 -- MA 02111-1307, USA.                                                      --
  23 --                                                                          --
  24 -- GNAT was originally developed  by the GNAT team at  New York University. --
  25 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
  26 --                                                                          --
  27 ------------------------------------------------------------------------------
  28
  29 with Atree;    use Atree;
  30 with Csets;    use Csets;
  31 with Errout;   use Errout;
  32 with Hostparm; use Hostparm;
  33 with Namet;    use Namet;
  34 with Opt;      use Opt;
  35 with Scans;    use Scans;
  36 with Sinput;   use Sinput;
  37 with Sinfo;    use Sinfo;
  38 with Snames;   use Snames;
  39 with Style;
  40 with Widechar; use Widechar;
  41
  42 with System.WCh_Con; use System.WCh_Con;
  43
  44 package body Scn is
  45
  46    use ASCII;
  47    --  Make control characters visible
  48
  49    Used_As_Identifier : array (Token_Type) of Boolean;
  50    --  Flags set True if a given keyword is used as an identifier (used to
  51    --  make sure that we only post an error message for incorrect use of a
  52    --  keyword as an identifier once for a given keyword).
  53
  54    -----------------------
  55    -- Local Subprograms --
  56    -----------------------
  57
  58    procedure Accumulate_Checksum (C : Character);
  59    pragma Inline (Accumulate_Checksum);
  60    --  This routine accumulates the checksum given character C. During the
  61    --  scanning of a source file, this routine is called with every character
  62    --  in the source, excluding blanks, and all control characters (except
  63    --  that ESC is included in the checksum). Upper case letters not in string
  64    --  literals are folded by the caller. See Sinput spec for the documentation
  65    --  of the checksum algorithm. Note: checksum values are only used if we
  66    --  generate code, so it is not necessary to worry about making the right
  67    --  sequence of calls in any error situation.
  68
  69    procedure Accumulate_Checksum (C : Char_Code);
  70    pragma Inline (Accumulate_Checksum);
  71    --  This version is identical, except that the argument, C, is a character
  72    --  code value instead of a character. This is used when wide characters
  73    --  are scanned. We use the character code rather than the ASCII characters
  74    --  so that the checksum is independent of wide character encoding method.
  75
  76    procedure Check_End_Of_Line;
  77    --  Called when end of line encountered. Checks that line is not
  78    --  too long, and that other style checks for the end of line are met.
  79
  80    function Determine_License return License_Type;
  81    --  Scan header of file and check that it has an appropriate GNAT-style
  82    --  header with a proper license statement. Returns GPL, Unrestricted,
  83    --  or Modified_GPL depending on header. If none of these, returns Unknown.
  84
  85    function Double_Char_Token (C : Character) return Boolean;
  86    --  This function is used for double character tokens like := or <>. It
  87    --  checks if the character following Source (Scan_Ptr) is C, and if so
  88    --  bumps Scan_Ptr past the pair of characters and returns True. A space
  89    --  between the two characters is also recognized with an appropriate
  90    --  error message being issued. If C is not present, False is returned.
  91    --  Note that Double_Char_Token can only be used for tokens defined in
  92    --  the Ada syntax (it's use for error cases like && is not appropriate
  93    --  since we do not want a junk message for a case like &-space-&).
  94
  95    procedure Error_Illegal_Character;
  96    --  Give illegal character error, Scan_Ptr points to character. On return,
  97    --  Scan_Ptr is bumped past the illegal character.
  98
  99    procedure Error_Illegal_Wide_Character;
 100    --  Give illegal wide character message. On return, Scan_Ptr is bumped
 101    --  past the illegal character, which may still leave us pointing to
 102    --  junk, not much we can do if the escape sequence is messed up!
 103
 104    procedure Error_Long_Line;
 105    --  Signal error of excessively long line
 106
 107    procedure Error_No_Double_Underline;
 108    --  Signal error of double underline character
 109
 110    procedure Nlit;
 111    --  This is the procedure for scanning out numeric literals. On entry,
 112    --  Scan_Ptr points to the digit that starts the numeric literal (the
 113    --  checksum for this character has not been accumulated yet). On return
 114    --  Scan_Ptr points past the last character of the numeric literal, Token
 115    --  and Token_Node are set appropriately, and the checksum is updated.
 116
 117    function Set_Start_Column return Column_Number;
 118    --  This routine is called with Scan_Ptr pointing to the first character
 119    --  of a line. On exit, Scan_Ptr is advanced to the first non-blank
 120    --  character of this line (or to the terminating format effector if the
 121    --  line contains no non-blank characters), and the returned result is the
 122    --  column number of this non-blank character (zero origin), which is the
 123    --  value to be stored in the Start_Column scan variable.
 124
 125    procedure Slit;
 126    --  This is the procedure for scanning out string literals. On entry,
 127    --  Scan_Ptr points to the opening string quote (the checksum for this
 128    --  character has not been accumulated yet). On return Scan_Ptr points
 129    --  past the closing quote of the string literal, Token and Token_Node
 130    --  are set appropriately, and the checksum is upated.
 131
 132    -------------------------
 133    -- Accumulate_Checksum --
 134    -------------------------
 135
 136    procedure Accumulate_Checksum (C : Character) is
 137    begin
 138       Checksum := Checksum + Checksum + Character'Pos (C);
 139
 140       if Checksum > 16#8000_0000# then
 141          Checksum := (Checksum + 1) and 16#7FFF_FFFF#;
 142       end if;
 143    end Accumulate_Checksum;
 144
 145    procedure Accumulate_Checksum (C : Char_Code) is
 146    begin
 147       Checksum := Checksum + Checksum + Char_Code'Pos (C);
 148
 149       if Checksum > 16#8000_0000# then
 150          Checksum := (Checksum + 1) and 16#7FFF_FFFF#;
 151       end if;
 152    end Accumulate_Checksum;
 153
 154    -----------------------
 155    -- Check_End_Of_Line --
 156    -----------------------
 157
 158    procedure Check_End_Of_Line is
 159       Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
 160
 161    begin
 162       if Len > Hostparm.Max_Line_Length then
 163          Error_Long_Line;
 164
 165       elsif Style_Check then
 166          Style.Check_Line_Terminator (Len);
 167       end if;
 168    end Check_End_Of_Line;
 169
 170    -----------------------
 171    -- Determine_License --
 172    -----------------------
 173
 174    function Determine_License return License_Type is
 175       GPL_Found : Boolean := False;
 176
 177       function Contains (S : String) return Boolean;
 178       --  See if current comment contains successive non-blank characters
 179       --  matching the contents of S. If so leave Scan_Ptr unchanged and
 180       --  return True, otherwise leave Scan_Ptr unchanged and return False.
 181
 182       procedure Skip_EOL;
 183       --  Skip to line terminator character
 184
 185       --------------
 186       -- Contains --
 187       --------------
 188
 189       function Contains (S : String) return Boolean is
 190          CP : Natural;
 191          SP : Source_Ptr;
 192          SS : Source_Ptr;
 193
 194       begin
 195          SP := Scan_Ptr;
 196          while Source (SP) /= CR and then Source (SP) /= LF loop
 197             if Source (SP) = S (S'First) then
 198                SS := SP;
 199                CP := S'First;
 200
 201                loop
 202                   SS := SS + 1;
 203                   CP := CP + 1;
 204
 205                   if CP > S'Last then
 206                      return True;
 207                   end if;
 208
 209                   while Source (SS) = ' ' loop
 210                      SS := SS + 1;
 211                   end loop;
 212
 213                   exit when Source (SS) /= S (CP);
 214                end loop;
 215             end if;
 216
 217             SP := SP + 1;
 218          end loop;
 219
 220          return False;
 221       end Contains;
 222
 223       --------------
 224       -- Skip_EOL --
 225       --------------
 226
 227       procedure Skip_EOL is
 228       begin
 229          while Source (Scan_Ptr) /= CR
 230            and then Source (Scan_Ptr) /= LF
 231          loop
 232             Scan_Ptr := Scan_Ptr + 1;
 233          end loop;
 234       end Skip_EOL;
 235
 236    --  Start of processing for Determine_License
 237
 238    begin
 239       loop
 240          if Source (Scan_Ptr) /= '-'
 241            or else Source (Scan_Ptr + 1) /= '-'
 242          then
 243             if GPL_Found then
 244                return GPL;
 245             else
 246                return Unknown;
 247             end if;
 248
 249          elsif Contains ("Asaspecialexception") then
 250             if GPL_Found then
 251                return Modified_GPL;
 252             end if;
 253
 254          elsif Contains ("GNUGeneralPublicLicense") then
 255             GPL_Found := True;
 256
 257          elsif
 258              Contains
 259                ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
 260            or else
 261              Contains
 262               ("ThisspecificationisderivedfromtheAdaReferenceManual")
 263          then
 264             return Unrestricted;
 265          end if;
 266
 267          Skip_EOL;
 268
 269          Check_End_Of_Line;
 270
 271          declare
 272             Physical : Boolean;
 273
 274          begin
 275             Skip_Line_Terminators (Scan_Ptr, Physical);
 276
 277             --  If we are at start of physical line, update scan pointers
 278             --  to reflect the start of the new line.
 279
 280             if Physical then
 281                Current_Line_Start       := Scan_Ptr;
 282                Start_Column             := Set_Start_Column;
 283                First_Non_Blank_Location := Scan_Ptr;
 284             end if;
 285          end;
 286       end loop;
 287    end Determine_License;
 288
 289    ----------------------------
 290    -- Determine_Token_Casing --
 291    ----------------------------
 292
 293    function Determine_Token_Casing return Casing_Type is
 294    begin
 295       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 296    end Determine_Token_Casing;
 297
 298    -----------------------
 299    -- Double_Char_Token --
 300    -----------------------
 301
 302    function Double_Char_Token (C : Character) return Boolean is
 303    begin
 304       if Source (Scan_Ptr + 1) = C then
 305          Accumulate_Checksum (C);
 306          Scan_Ptr := Scan_Ptr + 2;
 307          return True;
 308
 309       elsif Source (Scan_Ptr + 1) = ' '
 310         and then Source (Scan_Ptr + 2) = C
 311       then
 312          Scan_Ptr := Scan_Ptr + 1;
 313          Error_Msg_S ("no space allowed here");
 314          Scan_Ptr := Scan_Ptr + 2;
 315          return True;
 316
 317       else
 318          return False;
 319       end if;
 320    end Double_Char_Token;
 321
 322    -----------------------------
 323    -- Error_Illegal_Character --
 324    -----------------------------
 325
 326    procedure Error_Illegal_Character is
 327    begin
 328       Error_Msg_S ("illegal character");
 329       Scan_Ptr := Scan_Ptr + 1;
 330    end Error_Illegal_Character;
 331
 332    ----------------------------------
 333    -- Error_Illegal_Wide_Character --
 334    ----------------------------------
 335
 336    procedure Error_Illegal_Wide_Character is
 337    begin
 338       if OpenVMS then
 339          Error_Msg_S
 340            ("illegal wide character, check " &
 341             "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
 342       else
 343          Error_Msg_S
 344            ("illegal wide character, check -gnatW switch");
 345       end if;
 346
 347       Scan_Ptr := Scan_Ptr + 1;
 348    end Error_Illegal_Wide_Character;
 349
 350    ---------------------
 351    -- Error_Long_Line --
 352    ---------------------
 353
 354    procedure Error_Long_Line is
 355    begin
 356       Error_Msg
 357         ("this line is too long",
 358          Current_Line_Start + Hostparm.Max_Line_Length);
 359    end Error_Long_Line;
 360
 361    -------------------------------
 362    -- Error_No_Double_Underline --
 363    -------------------------------
 364
 365    procedure Error_No_Double_Underline is
 366    begin
 367       Error_Msg_S ("two consecutive underlines not permitted");
 368    end Error_No_Double_Underline;
 369
 370    ------------------------
 371    -- Initialize_Scanner --
 372    ------------------------
 373
 374    procedure Initialize_Scanner
 375      (Unit  : Unit_Number_Type;
 376       Index : Source_File_Index)
 377    is
 378       GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
 379
 380    begin
 381       --  Set up Token_Type values in Names Table entries for reserved keywords
 382       --  We use the Pos value of the Token_Type value. Note we are relying on
 383       --  the fact that Token_Type'Val (0) is not a reserved word!
 384
 385       Set_Name_Table_Byte (Name_Abort,      Token_Type'Pos (Tok_Abort));
 386       Set_Name_Table_Byte (Name_Abs,        Token_Type'Pos (Tok_Abs));
 387       Set_Name_Table_Byte (Name_Abstract,   Token_Type'Pos (Tok_Abstract));
 388       Set_Name_Table_Byte (Name_Accept,     Token_Type'Pos (Tok_Accept));
 389       Set_Name_Table_Byte (Name_Access,     Token_Type'Pos (Tok_Access));
 390       Set_Name_Table_Byte (Name_And,        Token_Type'Pos (Tok_And));
 391       Set_Name_Table_Byte (Name_Aliased,    Token_Type'Pos (Tok_Aliased));
 392       Set_Name_Table_Byte (Name_All,        Token_Type'Pos (Tok_All));
 393       Set_Name_Table_Byte (Name_Array,      Token_Type'Pos (Tok_Array));
 394       Set_Name_Table_Byte (Name_At,         Token_Type'Pos (Tok_At));
 395       Set_Name_Table_Byte (Name_Begin,      Token_Type'Pos (Tok_Begin));
 396       Set_Name_Table_Byte (Name_Body,       Token_Type'Pos (Tok_Body));
 397       Set_Name_Table_Byte (Name_Case,       Token_Type'Pos (Tok_Case));
 398       Set_Name_Table_Byte (Name_Constant,   Token_Type'Pos (Tok_Constant));
 399       Set_Name_Table_Byte (Name_Declare,    Token_Type'Pos (Tok_Declare));
 400       Set_Name_Table_Byte (Name_Delay,      Token_Type'Pos (Tok_Delay));
 401       Set_Name_Table_Byte (Name_Delta,      Token_Type'Pos (Tok_Delta));
 402       Set_Name_Table_Byte (Name_Digits,     Token_Type'Pos (Tok_Digits));
 403       Set_Name_Table_Byte (Name_Do,         Token_Type'Pos (Tok_Do));
 404       Set_Name_Table_Byte (Name_Else,       Token_Type'Pos (Tok_Else));
 405       Set_Name_Table_Byte (Name_Elsif,      Token_Type'Pos (Tok_Elsif));
 406       Set_Name_Table_Byte (Name_End,        Token_Type'Pos (Tok_End));
 407       Set_Name_Table_Byte (Name_Entry,      Token_Type'Pos (Tok_Entry));
 408       Set_Name_Table_Byte (Name_Exception,  Token_Type'Pos (Tok_Exception));
 409       Set_Name_Table_Byte (Name_Exit,       Token_Type'Pos (Tok_Exit));
 410       Set_Name_Table_Byte (Name_For,        Token_Type'Pos (Tok_For));
 411       Set_Name_Table_Byte (Name_Function,   Token_Type'Pos (Tok_Function));
 412       Set_Name_Table_Byte (Name_Generic,    Token_Type'Pos (Tok_Generic));
 413       Set_Name_Table_Byte (Name_Goto,       Token_Type'Pos (Tok_Goto));
 414       Set_Name_Table_Byte (Name_If,         Token_Type'Pos (Tok_If));
 415       Set_Name_Table_Byte (Name_In,         Token_Type'Pos (Tok_In));
 416       Set_Name_Table_Byte (Name_Is,         Token_Type'Pos (Tok_Is));
 417       Set_Name_Table_Byte (Name_Limited,    Token_Type'Pos (Tok_Limited));
 418       Set_Name_Table_Byte (Name_Loop,       Token_Type'Pos (Tok_Loop));
 419       Set_Name_Table_Byte (Name_Mod,        Token_Type'Pos (Tok_Mod));
 420       Set_Name_Table_Byte (Name_New,        Token_Type'Pos (Tok_New));
 421       Set_Name_Table_Byte (Name_Not,        Token_Type'Pos (Tok_Not));
 422       Set_Name_Table_Byte (Name_Null,       Token_Type'Pos (Tok_Null));
 423       Set_Name_Table_Byte (Name_Of,         Token_Type'Pos (Tok_Of));
 424       Set_Name_Table_Byte (Name_Or,         Token_Type'Pos (Tok_Or));
 425       Set_Name_Table_Byte (Name_Others,     Token_Type'Pos (Tok_Others));
 426       Set_Name_Table_Byte (Name_Out,        Token_Type'Pos (Tok_Out));
 427       Set_Name_Table_Byte (Name_Package,    Token_Type'Pos (Tok_Package));
 428       Set_Name_Table_Byte (Name_Pragma,     Token_Type'Pos (Tok_Pragma));
 429       Set_Name_Table_Byte (Name_Private,    Token_Type'Pos (Tok_Private));
 430       Set_Name_Table_Byte (Name_Procedure,  Token_Type'Pos (Tok_Procedure));
 431       Set_Name_Table_Byte (Name_Protected,  Token_Type'Pos (Tok_Protected));
 432       Set_Name_Table_Byte (Name_Raise,      Token_Type'Pos (Tok_Raise));
 433       Set_Name_Table_Byte (Name_Range,      Token_Type'Pos (Tok_Range));
 434       Set_Name_Table_Byte (Name_Record,     Token_Type'Pos (Tok_Record));
 435       Set_Name_Table_Byte (Name_Rem,        Token_Type'Pos (Tok_Rem));
 436       Set_Name_Table_Byte (Name_Renames,    Token_Type'Pos (Tok_Renames));
 437       Set_Name_Table_Byte (Name_Requeue,    Token_Type'Pos (Tok_Requeue));
 438       Set_Name_Table_Byte (Name_Return,     Token_Type'Pos (Tok_Return));
 439       Set_Name_Table_Byte (Name_Reverse,    Token_Type'Pos (Tok_Reverse));
 440       Set_Name_Table_Byte (Name_Select,     Token_Type'Pos (Tok_Select));
 441       Set_Name_Table_Byte (Name_Separate,   Token_Type'Pos (Tok_Separate));
 442       Set_Name_Table_Byte (Name_Subtype,    Token_Type'Pos (Tok_Subtype));
 443       Set_Name_Table_Byte (Name_Tagged,     Token_Type'Pos (Tok_Tagged));
 444       Set_Name_Table_Byte (Name_Task,       Token_Type'Pos (Tok_Task));
 445       Set_Name_Table_Byte (Name_Terminate,  Token_Type'Pos (Tok_Terminate));
 446       Set_Name_Table_Byte (Name_Then,       Token_Type'Pos (Tok_Then));
 447       Set_Name_Table_Byte (Name_Type,       Token_Type'Pos (Tok_Type));
 448       Set_Name_Table_Byte (Name_Until,      Token_Type'Pos (Tok_Until));
 449       Set_Name_Table_Byte (Name_Use,        Token_Type'Pos (Tok_Use));
 450       Set_Name_Table_Byte (Name_When,       Token_Type'Pos (Tok_When));
 451       Set_Name_Table_Byte (Name_While,      Token_Type'Pos (Tok_While));
 452       Set_Name_Table_Byte (Name_With,       Token_Type'Pos (Tok_With));
 453       Set_Name_Table_Byte (Name_Xor,        Token_Type'Pos (Tok_Xor));
 454
 455       --  Initialize scan control variables
 456
 457       Current_Source_File       := Index;
 458       Source                    := Source_Text (Current_Source_File);
 459       Current_Source_Unit       := Unit;
 460       Scan_Ptr                  := Source_First (Current_Source_File);
 461       Token                     := No_Token;
 462       Token_Ptr                 := Scan_Ptr;
 463       Current_Line_Start        := Scan_Ptr;
 464       Token_Node                := Empty;
 465       Token_Name                := No_Name;
 466       Start_Column              := Set_Start_Column;
 467       First_Non_Blank_Location  := Scan_Ptr;
 468       Checksum                  := 0;
 469
 470       --  Set default for Comes_From_Source. All nodes built now until we
 471       --  reenter the analyzer will have Comes_From_Source set to True
 472
 473       Set_Comes_From_Source_Default (True);
 474
 475       --  Check license if GNAT type header possibly present
 476
 477       if Source_Last (Index) - Scan_Ptr > 80
 478         and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
 479       then
 480          Set_License (Current_Source_File, Determine_License);
 481       end if;
 482
 483       --  Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
 484
 485       Scan;
 486
 487       --  Clear flags for reserved words used as indentifiers
 488
 489       for J in Token_Type loop
 490          Used_As_Identifier (J) := False;
 491       end loop;
 492
 493    end Initialize_Scanner;
 494
 495    ----------
 496    -- Nlit --
 497    ----------
 498
 499    procedure Nlit is separate;
 500
 501    ----------
 502    -- Scan --
 503    ----------
 504
 505    procedure Scan is
 506    begin
 507       Prev_Token := Token;
 508       Prev_Token_Ptr := Token_Ptr;
 509       Token_Name := Error_Name;
 510
 511       --  The following loop runs more than once only if a format effector
 512       --  (tab, vertical tab, form  feed, line feed, carriage return) is
 513       --  encountered and skipped, or some error situation, such as an
 514       --  illegal character, is encountered.
 515
 516       loop
 517          --  Skip past blanks, loop is opened up for speed
 518
 519          while Source (Scan_Ptr) = ' ' loop
 520
 521             if Source (Scan_Ptr + 1) /= ' ' then
 522                Scan_Ptr := Scan_Ptr + 1;
 523                exit;
 524             end if;
 525
 526             if Source (Scan_Ptr + 2) /= ' ' then
 527                Scan_Ptr := Scan_Ptr + 2;
 528                exit;
 529             end if;
 530
 531             if Source (Scan_Ptr + 3) /= ' ' then
 532                Scan_Ptr := Scan_Ptr + 3;
 533                exit;
 534             end if;
 535
 536             if Source (Scan_Ptr + 4) /= ' ' then
 537                Scan_Ptr := Scan_Ptr + 4;
 538                exit;
 539             end if;
 540
 541             if Source (Scan_Ptr + 5) /= ' ' then
 542                Scan_Ptr := Scan_Ptr + 5;
 543                exit;
 544             end if;
 545
 546             if Source (Scan_Ptr + 6) /= ' ' then
 547                Scan_Ptr := Scan_Ptr + 6;
 548                exit;
 549             end if;
 550
 551             if Source (Scan_Ptr + 7) /= ' ' then
 552                Scan_Ptr := Scan_Ptr + 7;
 553                exit;
 554             end if;
 555
 556             Scan_Ptr := Scan_Ptr + 8;
 557          end loop;
 558
 559          --  We are now at a non-blank character, which is the first character
 560          --  of the token we will scan, and hence the value of Token_Ptr.
 561
 562          Token_Ptr := Scan_Ptr;
 563
 564          --  Here begins the main case statement which transfers control on
 565          --  the basis of the non-blank character we have encountered.
 566
 567          case Source (Scan_Ptr) is
 568
 569          --  Line terminator characters
 570
 571          when CR | LF | FF | VT => Line_Terminator_Case : begin
 572
 573             --  Check line too long
 574
 575             Check_End_Of_Line;
 576
 577             declare
 578                Physical : Boolean;
 579
 580             begin
 581                Skip_Line_Terminators (Scan_Ptr, Physical);
 582
 583                --  If we are at start of physical line, update scan pointers
 584                --  to reflect the start of the new line.
 585
 586                if Physical then
 587                   Current_Line_Start       := Scan_Ptr;
 588                   Start_Column             := Set_Start_Column;
 589                   First_Non_Blank_Location := Scan_Ptr;
 590                end if;
 591             end;
 592          end Line_Terminator_Case;
 593
 594          --  Horizontal tab, just skip past it
 595
 596          when HT =>
 597             if Style_Check then Style.Check_HT; end if;
 598             Scan_Ptr := Scan_Ptr + 1;
 599
 600          --  End of file character, treated as an end of file only if it
 601          --  is the last character in the buffer, otherwise it is ignored.
 602
 603          when EOF =>
 604             if Scan_Ptr = Source_Last (Current_Source_File) then
 605                Check_End_Of_Line;
 606                Token := Tok_EOF;
 607                return;
 608
 609             else
 610                Scan_Ptr := Scan_Ptr + 1;
 611             end if;
 612
 613          --  Ampersand
 614
 615          when '&' =>
 616             Accumulate_Checksum ('&');
 617
 618             if Source (Scan_Ptr + 1) = '&' then
 619                Error_Msg_S ("'&'& should be `AND THEN`");
 620                Scan_Ptr := Scan_Ptr + 2;
 621                Token := Tok_And;
 622                return;
 623
 624             else
 625                Scan_Ptr := Scan_Ptr + 1;
 626                Token := Tok_Ampersand;
 627                return;
 628             end if;
 629
 630          --  Asterisk (can be multiplication operator or double asterisk
 631          --  which is the exponentiation compound delimtier).
 632
 633          when '*' =>
 634             Accumulate_Checksum ('*');
 635
 636             if Source (Scan_Ptr + 1) = '*' then
 637                Accumulate_Checksum ('*');
 638                Scan_Ptr := Scan_Ptr + 2;
 639                Token := Tok_Double_Asterisk;
 640                return;
 641
 642             else
 643                Scan_Ptr := Scan_Ptr + 1;
 644                Token := Tok_Asterisk;
 645                return;
 646             end if;
 647
 648          --  Colon, which can either be an isolated colon, or part of an
 649          --  assignment compound delimiter.
 650
 651          when ':' =>
 652             Accumulate_Checksum (':');
 653
 654             if Double_Char_Token ('=') then
 655                Token := Tok_Colon_Equal;
 656                if Style_Check then Style.Check_Colon_Equal; end if;
 657                return;
 658
 659             elsif Source (Scan_Ptr + 1) = '-'
 660               and then Source (Scan_Ptr + 2) /= '-'
 661             then
 662                Token := Tok_Colon_Equal;
 663                Error_Msg (":- should be :=", Scan_Ptr);
 664                Scan_Ptr := Scan_Ptr + 2;
 665                return;
 666
 667             else
 668                Scan_Ptr := Scan_Ptr + 1;
 669                Token := Tok_Colon;
 670                if Style_Check then Style.Check_Colon; end if;
 671                return;
 672             end if;
 673
 674          --  Left parenthesis
 675
 676          when '(' =>
 677             Accumulate_Checksum ('(');
 678             Scan_Ptr := Scan_Ptr + 1;
 679             Token := Tok_Left_Paren;
 680             if Style_Check then Style.Check_Left_Paren; end if;
 681             return;
 682
 683          --  Left bracket
 684
 685          when '[' =>
 686             if Source (Scan_Ptr + 1) = '"' then
 687                Name_Len := 0;
 688                goto Scan_Identifier;
 689
 690             else
 691                Error_Msg_S ("illegal character, replaced by ""(""");
 692                Scan_Ptr := Scan_Ptr + 1;
 693                Token := Tok_Left_Paren;
 694                return;
 695             end if;
 696
 697          --  Left brace
 698
 699          when '{' =>
 700             Error_Msg_S ("illegal character, replaced by ""(""");
 701             Scan_Ptr := Scan_Ptr + 1;
 702             Token := Tok_Left_Paren;
 703             return;
 704
 705          --  Comma
 706
 707          when ',' =>
 708             Accumulate_Checksum (',');
 709             Scan_Ptr := Scan_Ptr + 1;
 710             Token := Tok_Comma;
 711             if Style_Check then Style.Check_Comma; end if;
 712             return;
 713
 714          --  Dot, which is either an isolated period, or part of a double
 715          --  dot compound delimiter sequence. We also check for the case of
 716          --  a digit following the period, to give a better error message.
 717
 718          when '.' =>
 719             Accumulate_Checksum ('.');
 720
 721             if Double_Char_Token ('.') then
 722                Token := Tok_Dot_Dot;
 723                if Style_Check then Style.Check_Dot_Dot; end if;
 724                return;
 725
 726             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
 727                Error_Msg_S ("numeric literal cannot start with point");
 728                Scan_Ptr := Scan_Ptr + 1;
 729
 730             else
 731                Scan_Ptr := Scan_Ptr + 1;
 732                Token := Tok_Dot;
 733                return;
 734             end if;
 735
 736          --  Equal, which can either be an equality operator, or part of the
 737          --  arrow (=>) compound delimiter.
 738
 739          when '=' =>
 740             Accumulate_Checksum ('=');
 741
 742             if Double_Char_Token ('>') then
 743                Token := Tok_Arrow;
 744                if Style_Check then Style.Check_Arrow; end if;
 745                return;
 746
 747             elsif Source (Scan_Ptr + 1) = '=' then
 748                Error_Msg_S ("== should be =");
 749                Scan_Ptr := Scan_Ptr + 1;
 750             end if;
 751
 752             Scan_Ptr := Scan_Ptr + 1;
 753             Token := Tok_Equal;
 754             return;
 755
 756          --  Greater than, which can be a greater than operator, greater than
 757          --  or equal operator, or first character of a right label bracket.
 758
 759          when '>' =>
 760             Accumulate_Checksum ('>');
 761
 762             if Double_Char_Token ('=') then
 763                Token := Tok_Greater_Equal;
 764                return;
 765
 766             elsif Double_Char_Token ('>') then
 767                Token := Tok_Greater_Greater;
 768                return;
 769
 770             else
 771                Scan_Ptr := Scan_Ptr + 1;
 772                Token := Tok_Greater;
 773                return;
 774             end if;
 775
 776          --  Less than, which can be a less than operator, less than or equal
 777          --  operator, or the first character of a left label bracket, or the
 778          --  first character of a box (<>) compound delimiter.
 779
 780          when '<' =>
 781             Accumulate_Checksum ('<');
 782
 783             if Double_Char_Token ('=') then
 784                Token := Tok_Less_Equal;
 785                return;
 786
 787             elsif Double_Char_Token ('>') then
 788                Token := Tok_Box;
 789                if Style_Check then Style.Check_Box; end if;
 790                return;
 791
 792             elsif Double_Char_Token ('<') then
 793                Token := Tok_Less_Less;
 794                return;
 795
 796             else
 797                Scan_Ptr := Scan_Ptr + 1;
 798                Token := Tok_Less;
 799                return;
 800             end if;
 801
 802          --  Minus, which is either a subtraction operator, or the first
 803          --  character of double minus starting a comment
 804
 805          when '-' => Minus_Case : begin
 806             if Source (Scan_Ptr + 1) = '>' then
 807                Error_Msg_S ("invalid token");
 808                Scan_Ptr := Scan_Ptr + 2;
 809                Token := Tok_Arrow;
 810                return;
 811
 812             elsif Source (Scan_Ptr + 1) /= '-' then
 813                Accumulate_Checksum ('-');
 814                Scan_Ptr := Scan_Ptr + 1;
 815                Token := Tok_Minus;
 816                return;
 817
 818             --  Comment
 819
 820             else -- Source (Scan_Ptr + 1) = '-' then
 821                if Style_Check then Style.Check_Comment; end if;
 822                Scan_Ptr := Scan_Ptr + 2;
 823
 824                --  Loop to scan comment (this loop runs more than once only if
 825                --  a horizontal tab or other non-graphic character is scanned)
 826
 827                loop
 828                   --  Scan to non graphic character (opened up for speed)
 829
 830                   loop
 831                      exit when Source (Scan_Ptr) not in Graphic_Character;
 832                      Scan_Ptr := Scan_Ptr + 1;
 833                      exit when Source (Scan_Ptr) not in Graphic_Character;
 834                      Scan_Ptr := Scan_Ptr + 1;
 835                      exit when Source (Scan_Ptr) not in Graphic_Character;
 836                      Scan_Ptr := Scan_Ptr + 1;
 837                      exit when Source (Scan_Ptr) not in Graphic_Character;
 838                      Scan_Ptr := Scan_Ptr + 1;
 839                      exit when Source (Scan_Ptr) not in Graphic_Character;
 840                      Scan_Ptr := Scan_Ptr + 1;
 841                   end loop;
 842
 843                   --  Keep going if horizontal tab
 844
 845                   if Source (Scan_Ptr) = HT then
 846                      if Style_Check then Style.Check_HT; end if;
 847                      Scan_Ptr := Scan_Ptr + 1;
 848
 849                   --  Terminate scan of comment if line terminator
 850
 851                   elsif Source (Scan_Ptr) in Line_Terminator then
 852                      exit;
 853
 854                   --  Terminate scan of comment if end of file encountered
 855                   --  (embedded EOF character or real last character in file)
 856
 857                   elsif Source (Scan_Ptr) = EOF then
 858                      exit;
 859
 860                   --  Keep going if character in 80-FF range, or is ESC. These
 861                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
 862                   --  They are allowed even in Ada 83 mode according to the
 863                   --  approved AI. ESC was added to the AI in June 93.
 864
 865                   elsif Source (Scan_Ptr) in Upper_Half_Character
 866                     or else Source (Scan_Ptr) = ESC
 867                   then
 868                      Scan_Ptr := Scan_Ptr + 1;
 869
 870                   --  Otherwise we have an illegal comment character
 871
 872                   else
 873                      Error_Illegal_Character;
 874                   end if;
 875
 876                end loop;
 877
 878                --  Note that we do NOT execute a return here, instead we fall
 879                --  through to reexecute the scan loop to look for a token.
 880
 881             end if;
 882          end Minus_Case;
 883
 884          --  Double quote or percent starting a string literal
 885
 886          when '"' | '%' =>
 887             Slit;
 888             return;
 889
 890          --  Apostrophe. This can either be the start of a character literal,
 891          --  or an isolated apostrophe used in a qualified expression or an
 892          --  attribute. We treat it as a character literal if it does not
 893          --  follow a right parenthesis, identifier, the keyword ALL or
 894          --  a literal. This means that we correctly treat constructs like:
 895
 896          --    A := CHARACTER'('A');
 897
 898          --  Note that RM-2.2(7) does not require a separator between
 899          --  "CHARACTER" and "'" in the above.
 900
 901          when ''' => Char_Literal_Case : declare
 902             Code : Char_Code;
 903             Err  : Boolean;
 904
 905          begin
 906             Accumulate_Checksum (''');
 907             Scan_Ptr := Scan_Ptr + 1;
 908
 909             --  Here is where we make the test to distinguish the cases. Treat
 910             --  as apostrophe if previous token is an identifier, right paren
 911             --  or the reserved word "all" (latter case as in A.all'Address)
 912             --  Also treat it as apostrophe after a literal (this catches
 913             --  some legitimate cases, like A."abs"'Address, and also gives
 914             --  better error behavior for impossible cases like 123'xxx).
 915
 916             if Prev_Token = Tok_Identifier
 917                or else Prev_Token = Tok_Right_Paren
 918                or else Prev_Token = Tok_All
 919                or else Prev_Token in Token_Class_Literal
 920             then
 921                Token := Tok_Apostrophe;
 922                return;
 923
 924             --  Otherwise the apostrophe starts a character literal
 925
 926             else
 927                --  Case of wide character literal with ESC or [ encoding
 928
 929                if (Source (Scan_Ptr) = ESC
 930                      and then
 931                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
 932                  or else
 933                    (Source (Scan_Ptr) in Upper_Half_Character
 934                      and then
 935                     Upper_Half_Encoding)
 936                  or else
 937                    (Source (Scan_Ptr) = '['
 938                      and then
 939                     Source (Scan_Ptr + 1) = '"')
 940                then
 941                   Scan_Wide (Source, Scan_Ptr, Code, Err);
 942                   Accumulate_Checksum (Code);
 943
 944                   if Err then
 945                      Error_Illegal_Wide_Character;
 946                   end if;
 947
 948                   if Source (Scan_Ptr) /= ''' then
 949                      Error_Msg_S ("missing apostrophe");
 950                   else
 951                      Scan_Ptr := Scan_Ptr + 1;
 952                   end if;
 953
 954                --  If we do not find a closing quote in the expected place then
 955                --  assume that we have a misguided attempt at a string literal.
 956
 957                --  However, if previous token is RANGE, then we return an
 958                --  apostrophe instead since this gives better error recovery
 959
 960                elsif Source (Scan_Ptr + 1) /= ''' then
 961
 962                   if Prev_Token = Tok_Range then
 963                      Token := Tok_Apostrophe;
 964                      return;
 965
 966                   else
 967                      Scan_Ptr := Scan_Ptr - 1;
 968                      Error_Msg_S
 969                        ("strings are delimited by double quote character");
 970                      Scn.Slit;
 971                      return;
 972                   end if;
 973
 974                --  Otherwise we have a (non-wide) character literal
 975
 976                else
 977                   Accumulate_Checksum (Source (Scan_Ptr));
 978
 979                   if Source (Scan_Ptr) not in Graphic_Character then
 980                      if Source (Scan_Ptr) in Upper_Half_Character then
 981                         if Ada_83 then
 982                            Error_Illegal_Character;
 983                         end if;
 984
 985                      else
 986                         Error_Illegal_Character;
 987                      end if;
 988                   end if;
 989
 990                   Code := Get_Char_Code (Source (Scan_Ptr));
 991                   Scan_Ptr := Scan_Ptr + 2;
 992                end if;
 993
 994                --  Fall through here with Scan_Ptr updated past the closing
 995                --  quote, and Code set to the Char_Code value for the literal
 996
 997                Accumulate_Checksum (''');
 998                Token := Tok_Char_Literal;
 999                Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1000                Set_Char_Literal_Value (Token_Node, Code);
1001                Set_Character_Literal_Name (Code);
1002                Token_Name := Name_Find;
1003                Set_Chars (Token_Node, Token_Name);
1004                return;
1005             end if;
1006          end Char_Literal_Case;
1007
1008          --  Right parenthesis
1009
1010          when ')' =>
1011             Accumulate_Checksum (')');
1012             Scan_Ptr := Scan_Ptr + 1;
1013             Token := Tok_Right_Paren;
1014             if Style_Check then Style.Check_Right_Paren; end if;
1015             return;
1016
1017          --  Right bracket or right brace, treated as right paren
1018
1019          when ']' | '}' =>
1020             Error_Msg_S ("illegal character, replaced by "")""");
1021             Scan_Ptr := Scan_Ptr + 1;
1022             Token := Tok_Right_Paren;
1023             return;
1024
1025          --  Slash (can be division operator or first character of not equal)
1026
1027          when '/' =>
1028             Accumulate_Checksum ('/');
1029
1030             if Double_Char_Token ('=') then
1031                Token := Tok_Not_Equal;
1032                return;
1033             else
1034                Scan_Ptr := Scan_Ptr + 1;
1035                Token := Tok_Slash;
1036                return;
1037             end if;
1038
1039          --  Semicolon
1040
1041          when ';' =>
1042             Accumulate_Checksum (';');
1043             Scan_Ptr := Scan_Ptr + 1;
1044             Token := Tok_Semicolon;
1045             if Style_Check then Style.Check_Semicolon; end if;
1046             return;
1047
1048          --  Vertical bar
1049
1050          when '|' => Vertical_Bar_Case : begin
1051             Accumulate_Checksum ('|');
1052
1053             --  Special check for || to give nice message
1054
1055             if Source (Scan_Ptr + 1) = '|' then
1056                Error_Msg_S ("""||"" should be `OR ELSE`");
1057                Scan_Ptr := Scan_Ptr + 2;
1058                Token := Tok_Or;
1059                return;
1060
1061             else
1062                Scan_Ptr := Scan_Ptr + 1;
1063                Token := Tok_Vertical_Bar;
1064                if Style_Check then Style.Check_Vertical_Bar; end if;
1065                return;
1066             end if;
1067          end Vertical_Bar_Case;
1068
1069          --  Exclamation, replacement character for vertical bar
1070
1071          when '!' => Exclamation_Case : begin
1072             Accumulate_Checksum ('!');
1073
1074             if Source (Scan_Ptr + 1) = '=' then
1075                Error_Msg_S ("'!= should be /=");
1076                Scan_Ptr := Scan_Ptr + 2;
1077                Token := Tok_Not_Equal;
1078                return;
1079
1080             else
1081                Scan_Ptr := Scan_Ptr + 1;
1082                Token := Tok_Vertical_Bar;
1083                return;
1084             end if;
1085
1086          end Exclamation_Case;
1087
1088          --  Plus
1089
1090          when '+' => Plus_Case : begin
1091             Accumulate_Checksum ('+');
1092             Scan_Ptr := Scan_Ptr + 1;
1093             Token := Tok_Plus;
1094             return;
1095          end Plus_Case;
1096
1097          --  Digits starting a numeric literal
1098
1099          when '0' .. '9' =>
1100             Nlit;
1101
1102             if Identifier_Char (Source (Scan_Ptr)) then
1103                Error_Msg_S
1104                  ("delimiter required between literal and identifier");
1105             end if;
1106
1107             return;
1108
1109          --  Lower case letters
1110
1111          when 'a' .. 'z' =>
1112             Name_Len := 1;
1113             Name_Buffer (1) := Source (Scan_Ptr);
1114             Accumulate_Checksum (Name_Buffer (1));
1115             Scan_Ptr := Scan_Ptr + 1;
1116             goto Scan_Identifier;
1117
1118          --  Upper case letters
1119
1120          when 'A' .. 'Z' =>
1121             Name_Len := 1;
1122             Name_Buffer (1) :=
1123               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1124             Accumulate_Checksum (Name_Buffer (1));
1125             Scan_Ptr := Scan_Ptr + 1;
1126             goto Scan_Identifier;
1127
1128          --  Underline character
1129
1130          when '_' =>
1131             Error_Msg_S ("identifier cannot start with underline");
1132             Name_Len := 1;
1133             Name_Buffer (1) := '_';
1134             Scan_Ptr := Scan_Ptr + 1;
1135             goto Scan_Identifier;
1136
1137          --  Space (not possible, because we scanned past blanks)
1138
1139          when ' ' =>
1140             raise Program_Error;
1141
1142          --  Characters in top half of ASCII 8-bit chart
1143
1144          when Upper_Half_Character =>
1145
1146             --  Wide character case. Note that Scan_Identifier will issue
1147             --  an appropriate message if wide characters are not allowed
1148             --  in identifiers.
1149
1150             if Upper_Half_Encoding then
1151                Name_Len := 0;
1152                goto Scan_Identifier;
1153
1154             --  Otherwise we have OK Latin-1 character
1155
1156             else
1157                --  Upper half characters may possibly be identifier letters
1158                --  but can never be digits, so Identifier_Character can be
1159                --  used to test for a valid start of identifier character.
1160
1161                if Identifier_Char (Source (Scan_Ptr)) then
1162                   Name_Len := 0;
1163                   goto Scan_Identifier;
1164                else
1165                   Error_Illegal_Character;
1166                end if;
1167             end if;
1168
1169          when ESC =>
1170
1171             --  ESC character, possible start of identifier if wide characters
1172             --  using ESC encoding are allowed in identifiers, which we can
1173             --  tell by looking at the Identifier_Char flag for ESC, which is
1174             --  only true if these conditions are met.
1175
1176             if Identifier_Char (ESC) then
1177                Name_Len := 0;
1178                goto Scan_Identifier;
1179             else
1180                Error_Illegal_Wide_Character;
1181             end if;
1182
1183          --  Invalid control characters
1184
1185          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | SO  |
1186               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1187               EM  | FS  | GS  | RS  | US  | DEL
1188          =>
1189             Error_Illegal_Character;
1190
1191          --  Invalid graphic characters
1192
1193          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1194             Error_Illegal_Character;
1195
1196          --  End switch on non-blank character
1197
1198          end case;
1199
1200       --  End loop past format effectors. The exit from this loop is by
1201       --  executing a return statement following completion of token scan
1202       --  (control never falls out of this loop to the code which follows)
1203
1204       end loop;
1205
1206       --  Identifier scanning routine. On entry, some initial characters
1207       --  of the identifier may have already been stored in Name_Buffer.
1208       --  If so, Name_Len has the number of characters stored. otherwise
1209       --  Name_Len is set to zero on entry.
1210
1211       <<Scan_Identifier>>
1212
1213          --  This loop scans as fast as possible past lower half letters
1214          --  and digits, which we expect to be the most common characters.
1215
1216          loop
1217             if Source (Scan_Ptr) in 'a' .. 'z'
1218               or else Source (Scan_Ptr) in '0' .. '9'
1219             then
1220                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1221                Accumulate_Checksum (Source (Scan_Ptr));
1222
1223             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1224                Name_Buffer (Name_Len + 1) :=
1225                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1226                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1227             else
1228                exit;
1229             end if;
1230
1231             --  Open out the loop a couple of times for speed
1232
1233             if Source (Scan_Ptr + 1) in 'a' .. 'z'
1234               or else Source (Scan_Ptr + 1) in '0' .. '9'
1235             then
1236                Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1237                Accumulate_Checksum (Source (Scan_Ptr + 1));
1238
1239             elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1240                Name_Buffer (Name_Len + 2) :=
1241                  Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1242                Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1243
1244             else
1245                Scan_Ptr := Scan_Ptr + 1;
1246                Name_Len := Name_Len + 1;
1247                exit;
1248             end if;
1249
1250             if Source (Scan_Ptr + 2) in 'a' .. 'z'
1251               or else Source (Scan_Ptr + 2) in '0' .. '9'
1252             then
1253                Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1254                Accumulate_Checksum (Source (Scan_Ptr + 2));
1255
1256             elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1257                Name_Buffer (Name_Len + 3) :=
1258                  Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1259                Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1260             else
1261                Scan_Ptr := Scan_Ptr + 2;
1262                Name_Len := Name_Len + 2;
1263                exit;
1264             end if;
1265
1266             if Source (Scan_Ptr + 3) in 'a' .. 'z'
1267               or else Source (Scan_Ptr + 3) in '0' .. '9'
1268             then
1269                Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1270                Accumulate_Checksum (Source (Scan_Ptr + 3));
1271
1272             elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1273                Name_Buffer (Name_Len + 4) :=
1274                  Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1275                Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1276
1277             else
1278                Scan_Ptr := Scan_Ptr + 3;
1279                Name_Len := Name_Len + 3;
1280                exit;
1281             end if;
1282
1283             Scan_Ptr := Scan_Ptr + 4;
1284             Name_Len := Name_Len + 4;
1285          end loop;
1286
1287          --  If we fall through, then we have encountered either an underline
1288          --  character, or an extended identifier character (i.e. one from the
1289          --  upper half), or a wide character, or an identifier terminator.
1290          --  The initial test speeds us up in the most common case where we
1291          --  have an identifier terminator. Note that ESC is an identifier
1292          --  character only if a wide character encoding method that uses
1293          --  ESC encoding is active, so if we find an ESC character we know
1294          --  that we have a wide character.
1295
1296          if Identifier_Char (Source (Scan_Ptr)) then
1297
1298             --  Case of underline, check for error cases of double underline,
1299             --  and for a trailing underline character
1300
1301             if Source (Scan_Ptr) = '_' then
1302                Accumulate_Checksum ('_');
1303                Name_Len := Name_Len + 1;
1304                Name_Buffer (Name_Len) := '_';
1305
1306                if Identifier_Char (Source (Scan_Ptr + 1)) then
1307                   Scan_Ptr := Scan_Ptr + 1;
1308
1309                   if Source (Scan_Ptr) = '_' then
1310                      Error_No_Double_Underline;
1311                   end if;
1312
1313                else
1314                   Error_Msg_S ("identifier cannot end with underline");
1315                   Scan_Ptr := Scan_Ptr + 1;
1316                end if;
1317
1318                goto Scan_Identifier;
1319
1320             --  Upper half character
1321
1322             elsif Source (Scan_Ptr) in Upper_Half_Character
1323               and then not Upper_Half_Encoding
1324             then
1325                Accumulate_Checksum (Source (Scan_Ptr));
1326                Store_Encoded_Character
1327                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1328                Scan_Ptr := Scan_Ptr + 1;
1329                goto Scan_Identifier;
1330
1331             --  Left bracket not followed by a quote terminates an identifier.
1332             --  This is an error, but we don't want to give a junk error msg
1333             --  about wide characters in this case!
1334
1335             elsif Source (Scan_Ptr) = '['
1336               and then Source (Scan_Ptr + 1) /= '"'
1337             then
1338                null;
1339
1340             --  We know we have a wide character encoding here (the current
1341             --  character is either ESC, left bracket, or an upper half
1342             --  character depending on the encoding method).
1343
1344             else
1345                --  Scan out the wide character and insert the appropriate
1346                --  encoding into the name table entry for the identifier.
1347
1348                declare
1349                   Sptr : constant Source_Ptr := Scan_Ptr;
1350                   Code : Char_Code;
1351                   Err  : Boolean;
1352
1353                begin
1354                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1355                   Accumulate_Checksum (Code);
1356
1357                   if Err then
1358                      Error_Illegal_Wide_Character;
1359                   else
1360                      Store_Encoded_Character (Code);
1361                   end if;
1362
1363                   --  Make sure we are allowing wide characters in identifiers.
1364                   --  Note that we allow wide character notation for an OK
1365                   --  identifier character. This in particular allows bracket
1366                   --  or other notation to be used for upper half letters.
1367
1368                   if Identifier_Character_Set /= 'w'
1369                     and then
1370                       (not In_Character_Range (Code)
1371                          or else
1372                        not Identifier_Char (Get_Character (Code)))
1373                   then
1374                      Error_Msg
1375                        ("wide character not allowed in identifier", Sptr);
1376                   end if;
1377                end;
1378
1379                goto Scan_Identifier;
1380             end if;
1381          end if;
1382
1383          --  Scan of identifier is complete. The identifier is stored in
1384          --  Name_Buffer, and Scan_Ptr points past the last character.
1385
1386          Token_Name := Name_Find;
1387
1388          --  Here is where we check if it was a keyword
1389
1390          if Get_Name_Table_Byte (Token_Name) /= 0
1391            and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1392          then
1393             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1394
1395             --  Deal with possible style check for non-lower case keyword,
1396             --  but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1397             --  for this purpose if they appear as attribute designators.
1398             --  Actually we only check the first character for speed.
1399
1400             if Style_Check
1401               and then Source (Token_Ptr) <= 'Z'
1402               and then (Prev_Token /= Tok_Apostrophe
1403                           or else
1404                             (Token /= Tok_Access
1405                                and then Token /= Tok_Delta
1406                                and then Token /= Tok_Digits
1407                                and then Token /= Tok_Range))
1408             then
1409                Style.Non_Lower_Case_Keyword;
1410             end if;
1411
1412             --  We must reset Token_Name since this is not an identifier
1413             --  and if we leave Token_Name set, the parser gets confused
1414             --  because it thinks it is dealing with an identifier instead
1415             --  of the corresponding keyword.
1416
1417             Token_Name := No_Name;
1418             return;
1419
1420          --  It is an identifier after all
1421
1422          else
1423             Token_Node := New_Node (N_Identifier, Token_Ptr);
1424             Set_Chars (Token_Node, Token_Name);
1425             Token := Tok_Identifier;
1426             return;
1427          end if;
1428    end Scan;
1429
1430    ---------------------
1431    -- Scan_First_Char --
1432    ---------------------
1433
1434    function Scan_First_Char return Source_Ptr is
1435       Ptr : Source_Ptr := Current_Line_Start;
1436
1437    begin
1438       loop
1439          if Source (Ptr) = ' ' then
1440             Ptr := Ptr + 1;
1441
1442          elsif Source (Ptr) = HT then
1443             if Style_Check then Style.Check_HT; end if;
1444             Ptr := Ptr + 1;
1445
1446          else
1447             return Ptr;
1448          end if;
1449       end loop;
1450    end Scan_First_Char;
1451
1452    ------------------------------
1453    -- Scan_Reserved_Identifier --
1454    ------------------------------
1455
1456    procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1457       Token_Chars : constant String := Token_Type'Image (Token);
1458
1459    begin
1460       --  We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1461       --  This code extracts the xxx and makes an identifier out of it.
1462
1463       Name_Len := 0;
1464
1465       for J in 5 .. Token_Chars'Length loop
1466          Name_Len := Name_Len + 1;
1467          Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1468       end loop;
1469
1470       Token_Name := Name_Find;
1471
1472       if not Used_As_Identifier (Token) or else Force_Msg then
1473          Error_Msg_Name_1 := Token_Name;
1474          Error_Msg_SC ("reserved word* cannot be used as identifier!");
1475          Used_As_Identifier (Token) := True;
1476       end if;
1477
1478       Token := Tok_Identifier;
1479       Token_Node := New_Node (N_Identifier, Token_Ptr);
1480       Set_Chars (Token_Node, Token_Name);
1481    end Scan_Reserved_Identifier;
1482
1483    ----------------------
1484    -- Set_Start_Column --
1485    ----------------------
1486
1487    --  Note: it seems at first glance a little expensive to compute this value
1488    --  for every source line (since it is certainly not used for all source
1489    --  lines). On the other hand, it doesn't take much more work to skip past
1490    --  the initial white space on the line counting the columns than it would
1491    --  to scan past the white space using the standard scanning circuits.
1492
1493    function Set_Start_Column return Column_Number is
1494       Start_Column : Column_Number := 0;
1495
1496    begin
1497       --  Outer loop scans past horizontal tab characters
1498
1499       Tabs_Loop : loop
1500
1501          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1502          --  past the blanks and adjusting Start_Column to account for them.
1503
1504          Blanks_Loop : loop
1505             if Source (Scan_Ptr) = ' ' then
1506                if Source (Scan_Ptr + 1) = ' ' then
1507                   if Source (Scan_Ptr + 2) = ' ' then
1508                      if Source (Scan_Ptr + 3) = ' ' then
1509                         if Source (Scan_Ptr + 4) = ' ' then
1510                            if Source (Scan_Ptr + 5) = ' ' then
1511                               if Source (Scan_Ptr + 6) = ' ' then
1512                                  Scan_Ptr := Scan_Ptr + 7;
1513                                  Start_Column := Start_Column + 7;
1514                               else
1515                                  Scan_Ptr := Scan_Ptr + 6;
1516                                  Start_Column := Start_Column + 6;
1517                                  exit Blanks_Loop;
1518                               end if;
1519                            else
1520                               Scan_Ptr := Scan_Ptr + 5;
1521                               Start_Column := Start_Column + 5;
1522                               exit Blanks_Loop;
1523                            end if;
1524                         else
1525                            Scan_Ptr := Scan_Ptr + 4;
1526                            Start_Column := Start_Column + 4;
1527                            exit Blanks_Loop;
1528                         end if;
1529                      else
1530                         Scan_Ptr := Scan_Ptr + 3;
1531                         Start_Column := Start_Column + 3;
1532                         exit Blanks_Loop;
1533                      end if;
1534                   else
1535                      Scan_Ptr := Scan_Ptr + 2;
1536                      Start_Column := Start_Column + 2;
1537                      exit Blanks_Loop;
1538                   end if;
1539                else
1540                   Scan_Ptr := Scan_Ptr + 1;
1541                   Start_Column := Start_Column + 1;
1542                   exit Blanks_Loop;
1543                end if;
1544             else
1545                exit Blanks_Loop;
1546             end if;
1547          end loop Blanks_Loop;
1548
1549          --  Outer loop keeps going only if a horizontal tab follows
1550
1551          if Source (Scan_Ptr) = HT then
1552             if Style_Check then Style.Check_HT; end if;
1553             Scan_Ptr := Scan_Ptr + 1;
1554             Start_Column := (Start_Column / 8) * 8 + 8;
1555          else
1556             exit Tabs_Loop;
1557          end if;
1558
1559       end loop Tabs_Loop;
1560
1561       return Start_Column;
1562    end Set_Start_Column;
1563
1564    ----------
1565    -- Slit --
1566    ----------
1567
1568    procedure Slit is separate;
1569
1570 end Scn;