gcc/ada/scng.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                 S C N G                                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2005 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  20 -- MA 02111-1307, USA.                                                      --
  21 --                                                                          --
  22 -- GNAT was originally developed  by the GNAT team at  New York University. --
  23 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  24 --                                                                          --
  25 ------------------------------------------------------------------------------
  26
  27 with Csets;    use Csets;
  28 with Err_Vars; use Err_Vars;
  29 with Namet;    use Namet;
  30 with Opt;      use Opt;
  31 with Scans;    use Scans;
  32 with Sinput;   use Sinput;
  33 with Snames;   use Snames;
  34 with Stringt;  use Stringt;
  35 with Stylesw;  use Stylesw;
  36 with Uintp;    use Uintp;
  37 with Urealp;   use Urealp;
  38 with Widechar; use Widechar;
  39
  40 with System.CRC32;
  41 with System.WCh_Con; use System.WCh_Con;
  42
  43 package body Scng is
  44
  45    use ASCII;
  46    --  Make control characters visible
  47
  48    Special_Characters : array (Character) of Boolean := (others => False);
  49    --  For characters that are Special token, the value is True
  50
  51    Comment_Is_Token : Boolean := False;
  52    --  True if comments are tokens
  53
  54    End_Of_Line_Is_Token : Boolean := False;
  55    --  True if End_Of_Line is a token
  56
  57    -----------------------
  58    -- Local Subprograms --
  59    -----------------------
  60
  61    procedure Accumulate_Token_Checksum;
  62    pragma Inline (Accumulate_Token_Checksum);
  63
  64    procedure Accumulate_Checksum (C : Character);
  65    pragma Inline (Accumulate_Checksum);
  66    --  This routine accumulates the checksum given character C. During the
  67    --  scanning of a source file, this routine is called with every character
  68    --  in the source, excluding blanks, and all control characters (except
  69    --  that ESC is included in the checksum). Upper case letters not in string
  70    --  literals are folded by the caller. See Sinput spec for the documentation
  71    --  of the checksum algorithm. Note: checksum values are only used if we
  72    --  generate code, so it is not necessary to worry about making the right
  73    --  sequence of calls in any error situation.
  74
  75    procedure Accumulate_Checksum (C : Char_Code);
  76    pragma Inline (Accumulate_Checksum);
  77    --  This version is identical, except that the argument, C, is a character
  78    --  code value instead of a character. This is used when wide characters
  79    --  are scanned. We use the character code rather than the ASCII characters
  80    --  so that the checksum is independent of wide character encoding method.
  81
  82    procedure Initialize_Checksum;
  83    pragma Inline (Initialize_Checksum);
  84    --  Initialize checksum value
  85
  86    -------------------------
  87    -- Accumulate_Checksum --
  88    -------------------------
  89
  90    procedure Accumulate_Checksum (C : Character) is
  91    begin
  92       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
  93    end Accumulate_Checksum;
  94
  95    procedure Accumulate_Checksum (C : Char_Code) is
  96    begin
  97       if C > 16#FFFF# then
  98          Accumulate_Checksum (Character'Val (C / 2 ** 16));
  99          Accumulate_Checksum (Character'Val ((C / 256) mod 256));
 100       else
 101          Accumulate_Checksum (Character'Val (C / 256));
 102       end if;
 103
 104       Accumulate_Checksum (Character'Val (C mod 256));
 105    end Accumulate_Checksum;
 106
 107    -------------------------------
 108    -- Accumulate_Token_Checksum --
 109    -------------------------------
 110
 111    procedure Accumulate_Token_Checksum is
 112    begin
 113       System.CRC32.Update
 114         (System.CRC32.CRC32 (Checksum),
 115          Character'Val (Token_Type'Pos (Token)));
 116    end Accumulate_Token_Checksum;
 117
 118    ----------------------------
 119    -- Determine_Token_Casing --
 120    ----------------------------
 121
 122    function Determine_Token_Casing return Casing_Type is
 123    begin
 124       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 125    end Determine_Token_Casing;
 126
 127    -------------------------
 128    -- Initialize_Checksum --
 129    -------------------------
 130
 131    procedure Initialize_Checksum is
 132    begin
 133       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 134    end Initialize_Checksum;
 135
 136    ------------------------
 137    -- Initialize_Scanner --
 138    ------------------------
 139
 140    procedure Initialize_Scanner
 141      (Unit  : Unit_Number_Type;
 142       Index : Source_File_Index)
 143    is
 144       procedure Set_Reserved (N : Name_Id; T : Token_Type);
 145       pragma Inline (Set_Reserved);
 146       --  Set given name as a reserved keyword (T is the corresponding token)
 147
 148       -------------
 149       -- Set_NTB --
 150       -------------
 151
 152       procedure Set_Reserved (N : Name_Id; T : Token_Type) is
 153       begin
 154          --  Set up Token_Type values in Names Table entries for reserved
 155          --  keywords We use the Pos value of the Token_Type value. Note we
 156          --  rely on the fact that Token_Type'Val (0) is not a reserved word!
 157
 158          Set_Name_Table_Byte (N, Token_Type'Pos (T));
 159       end Set_Reserved;
 160
 161    --  Start of processing for Initialize_Scanner
 162
 163    begin
 164       --  Establish reserved words
 165
 166       Set_Reserved (Name_Abort,     Tok_Abort);
 167       Set_Reserved (Name_Abs,       Tok_Abs);
 168       Set_Reserved (Name_Abstract,  Tok_Abstract);
 169       Set_Reserved (Name_Accept,    Tok_Accept);
 170       Set_Reserved (Name_Access,    Tok_Access);
 171       Set_Reserved (Name_And,       Tok_And);
 172       Set_Reserved (Name_Aliased,   Tok_Aliased);
 173       Set_Reserved (Name_All,       Tok_All);
 174       Set_Reserved (Name_Array,     Tok_Array);
 175       Set_Reserved (Name_At,        Tok_At);
 176       Set_Reserved (Name_Begin,     Tok_Begin);
 177       Set_Reserved (Name_Body,      Tok_Body);
 178       Set_Reserved (Name_Case,      Tok_Case);
 179       Set_Reserved (Name_Constant,  Tok_Constant);
 180       Set_Reserved (Name_Declare,   Tok_Declare);
 181       Set_Reserved (Name_Delay,     Tok_Delay);
 182       Set_Reserved (Name_Delta,     Tok_Delta);
 183       Set_Reserved (Name_Digits,    Tok_Digits);
 184       Set_Reserved (Name_Do,        Tok_Do);
 185       Set_Reserved (Name_Else,      Tok_Else);
 186       Set_Reserved (Name_Elsif,     Tok_Elsif);
 187       Set_Reserved (Name_End,       Tok_End);
 188       Set_Reserved (Name_Entry,     Tok_Entry);
 189       Set_Reserved (Name_Exception, Tok_Exception);
 190       Set_Reserved (Name_Exit,      Tok_Exit);
 191       Set_Reserved (Name_For,       Tok_For);
 192       Set_Reserved (Name_Function,  Tok_Function);
 193       Set_Reserved (Name_Generic,   Tok_Generic);
 194       Set_Reserved (Name_Goto,      Tok_Goto);
 195       Set_Reserved (Name_If,        Tok_If);
 196       Set_Reserved (Name_In,        Tok_In);
 197       Set_Reserved (Name_Is,        Tok_Is);
 198       Set_Reserved (Name_Limited,   Tok_Limited);
 199       Set_Reserved (Name_Loop,      Tok_Loop);
 200       Set_Reserved (Name_Mod,       Tok_Mod);
 201       Set_Reserved (Name_New,       Tok_New);
 202       Set_Reserved (Name_Not,       Tok_Not);
 203       Set_Reserved (Name_Null,      Tok_Null);
 204       Set_Reserved (Name_Of,        Tok_Of);
 205       Set_Reserved (Name_Or,        Tok_Or);
 206       Set_Reserved (Name_Others,    Tok_Others);
 207       Set_Reserved (Name_Out,       Tok_Out);
 208       Set_Reserved (Name_Package,   Tok_Package);
 209       Set_Reserved (Name_Pragma,    Tok_Pragma);
 210       Set_Reserved (Name_Private,   Tok_Private);
 211       Set_Reserved (Name_Procedure, Tok_Procedure);
 212       Set_Reserved (Name_Protected, Tok_Protected);
 213       Set_Reserved (Name_Raise,     Tok_Raise);
 214       Set_Reserved (Name_Range,     Tok_Range);
 215       Set_Reserved (Name_Record,    Tok_Record);
 216       Set_Reserved (Name_Rem,       Tok_Rem);
 217       Set_Reserved (Name_Renames,   Tok_Renames);
 218       Set_Reserved (Name_Requeue,   Tok_Requeue);
 219       Set_Reserved (Name_Return,    Tok_Return);
 220       Set_Reserved (Name_Reverse,   Tok_Reverse);
 221       Set_Reserved (Name_Select,    Tok_Select);
 222       Set_Reserved (Name_Separate,  Tok_Separate);
 223       Set_Reserved (Name_Subtype,   Tok_Subtype);
 224       Set_Reserved (Name_Tagged,    Tok_Tagged);
 225       Set_Reserved (Name_Task,      Tok_Task);
 226       Set_Reserved (Name_Terminate, Tok_Terminate);
 227       Set_Reserved (Name_Then,      Tok_Then);
 228       Set_Reserved (Name_Type,      Tok_Type);
 229       Set_Reserved (Name_Until,     Tok_Until);
 230       Set_Reserved (Name_Use,       Tok_Use);
 231       Set_Reserved (Name_When,      Tok_When);
 232       Set_Reserved (Name_While,     Tok_While);
 233       Set_Reserved (Name_With,      Tok_With);
 234       Set_Reserved (Name_Xor,       Tok_Xor);
 235
 236       --  Ada 2005 reserved words
 237
 238       Set_Reserved (Name_Interface,     Tok_Interface);
 239       Set_Reserved (Name_Overriding,    Tok_Overriding);
 240       Set_Reserved (Name_Synchronized,  Tok_Synchronized);
 241
 242       --  Initialize scan control variables
 243
 244       Current_Source_File       := Index;
 245       Source                    := Source_Text (Current_Source_File);
 246       Current_Source_Unit       := Unit;
 247       Scan_Ptr                  := Source_First (Current_Source_File);
 248       Token                     := No_Token;
 249       Token_Ptr                 := Scan_Ptr;
 250       Current_Line_Start        := Scan_Ptr;
 251       Token_Node                := Empty;
 252       Token_Name                := No_Name;
 253       Start_Column              := Set_Start_Column;
 254       First_Non_Blank_Location  := Scan_Ptr;
 255
 256       Initialize_Checksum;
 257
 258       --  Do not call Scan, otherwise the License stuff does not work in Scn
 259
 260    end Initialize_Scanner;
 261
 262    ------------------------------
 263    -- Reset_Special_Characters --
 264    ------------------------------
 265
 266    procedure Reset_Special_Characters is
 267    begin
 268       Special_Characters := (others => False);
 269    end Reset_Special_Characters;
 270
 271    ----------
 272    -- Scan --
 273    ----------
 274
 275    procedure Scan is
 276
 277       Start_Of_Comment : Source_Ptr;
 278       --  Record start of comment position
 279
 280       Underline_Found : Boolean;
 281       --  During scanning of an identifier, set to True if last character
 282       --  scanned was an underline or other punctuation character. This
 283       --  is used to flag the error of two underlines/punctuations in a
 284       --  row or ending an identifier with a underline/punctuation. Here
 285       --  punctuation means any UTF_32 character in the Unicode category
 286       --  Punctuation,Connector.
 287
 288       Wptr : Source_Ptr;
 289       --  Used to remember start of last wide character scanned
 290
 291       procedure Check_End_Of_Line;
 292       --  Called when end of line encountered. Checks that line is not too
 293       --  long, and that other style checks for the end of line are met.
 294
 295       function Double_Char_Token (C : Character) return Boolean;
 296       --  This function is used for double character tokens like := or <>. It
 297       --  checks if the character following Source (Scan_Ptr) is C, and if so
 298       --  bumps Scan_Ptr past the pair of characters and returns True. A space
 299       --  between the two characters is also recognized with an appropriate
 300       --  error message being issued. If C is not present, False is returned.
 301       --  Note that Double_Char_Token can only be used for tokens defined in
 302       --  the Ada syntax (it's use for error cases like && is not appropriate
 303       --  since we do not want a junk message for a case like &-space-&).
 304
 305       procedure Error_Illegal_Character;
 306       --  Give illegal character error, Scan_Ptr points to character. On
 307       --  return, Scan_Ptr is bumped past the illegal character.
 308
 309       procedure Error_Illegal_Wide_Character;
 310       --  Give illegal wide character message. On return, Scan_Ptr is bumped
 311       --  past the illegal character, which may still leave us pointing to
 312       --  junk, not much we can do if the escape sequence is messed up!
 313
 314       procedure Error_Long_Line;
 315       --  Signal error of excessively long line
 316
 317       procedure Error_No_Double_Underline;
 318       --  Signal error of two underline or punctuation characters in a row.
 319       --  Called with Scan_Ptr pointing to second underline/punctuation char.
 320
 321       procedure Nlit;
 322       --  This is the procedure for scanning out numeric literals. On entry,
 323       --  Scan_Ptr points to the digit that starts the numeric literal (the
 324       --  checksum for this character has not been accumulated yet). On return
 325       --  Scan_Ptr points past the last character of the numeric literal, Token
 326       --  and Token_Node are set appropriately, and the checksum is updated.
 327
 328       procedure Slit;
 329       --  This is the procedure for scanning out string literals. On entry,
 330       --  Scan_Ptr points to the opening string quote (the checksum for this
 331       --  character has not been accumulated yet). On return Scan_Ptr points
 332       --  past the closing quote of the string literal, Token and Token_Node
 333       --  are set appropriately, and the checksum is upated.
 334
 335       -----------------------
 336       -- Check_End_Of_Line --
 337       -----------------------
 338
 339       procedure Check_End_Of_Line is
 340          Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
 341
 342       begin
 343          if Style_Check and Style_Check_Max_Line_Length then
 344             Style.Check_Line_Terminator (Len);
 345
 346          --  If style checking is inactive, check maximum line length against
 347          --  standard value. Note that we take this from Opt.Max_Line_Length
 348          --  rather than Hostparm.Max_Line_Length because we do not want to
 349          --  impose any limit during scanning of configuration pragma files,
 350          --  and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
 351          --  is reset to Column_Number'Max during scanning of such files.
 352
 353          elsif Len > Opt.Max_Line_Length then
 354             Error_Long_Line;
 355          end if;
 356       end Check_End_Of_Line;
 357
 358       -----------------------
 359       -- Double_Char_Token --
 360       -----------------------
 361
 362       function Double_Char_Token (C : Character) return Boolean is
 363       begin
 364          if Source (Scan_Ptr + 1) = C then
 365             Accumulate_Checksum (C);
 366             Scan_Ptr := Scan_Ptr + 2;
 367             return True;
 368
 369          elsif Source (Scan_Ptr + 1) = ' '
 370            and then Source (Scan_Ptr + 2) = C
 371          then
 372             Scan_Ptr := Scan_Ptr + 1;
 373             Error_Msg_S ("no space allowed here");
 374             Scan_Ptr := Scan_Ptr + 2;
 375             return True;
 376
 377          else
 378             return False;
 379          end if;
 380       end Double_Char_Token;
 381
 382       -----------------------------
 383       -- Error_Illegal_Character --
 384       -----------------------------
 385
 386       procedure Error_Illegal_Character is
 387       begin
 388          Error_Msg_S ("illegal character");
 389          Scan_Ptr := Scan_Ptr + 1;
 390       end Error_Illegal_Character;
 391
 392       ----------------------------------
 393       -- Error_Illegal_Wide_Character --
 394       ----------------------------------
 395
 396       procedure Error_Illegal_Wide_Character is
 397       begin
 398          Error_Msg ("illegal wide character", Wptr);
 399       end Error_Illegal_Wide_Character;
 400
 401       ---------------------
 402       -- Error_Long_Line --
 403       ---------------------
 404
 405       procedure Error_Long_Line is
 406       begin
 407          Error_Msg
 408            ("this line is too long",
 409             Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
 410       end Error_Long_Line;
 411
 412       -------------------------------
 413       -- Error_No_Double_Underline --
 414       -------------------------------
 415
 416       procedure Error_No_Double_Underline is
 417       begin
 418          Underline_Found := False;
 419
 420          --  There are four cases, and we special case the messages
 421
 422          if Source (Scan_Ptr) = '_' then
 423             if Source (Scan_Ptr - 1) = '_' then
 424                Error_Msg_S
 425                  ("two consecutive underlines not permitted");
 426             else
 427                Error_Msg_S
 428                  ("underline cannot follow punctuation character");
 429             end if;
 430
 431          else
 432             if Source (Scan_Ptr - 1) = '_' then
 433                Error_Msg_S
 434                  ("punctuation character cannot follow underline");
 435             else
 436                Error_Msg_S
 437                  ("two consecutive punctuation characters not permitted");
 438             end if;
 439          end if;
 440       end Error_No_Double_Underline;
 441
 442       ----------
 443       -- Nlit --
 444       ----------
 445
 446       procedure Nlit is
 447
 448          C : Character;
 449          --  Current source program character
 450
 451          Base_Char : Character;
 452          --  Either # or : (character at start of based number)
 453
 454          Base : Int;
 455          --  Value of base
 456
 457          UI_Base : Uint;
 458          --  Value of base in Uint format
 459
 460          UI_Int_Value : Uint;
 461          --  Value of integer scanned by Scan_Integer in Uint format
 462
 463          UI_Num_Value : Uint;
 464          --  Value of integer in numeric value being scanned
 465
 466          Scale : Int;
 467          --  Scale value for real literal
 468
 469          UI_Scale : Uint;
 470          --  Scale in Uint format
 471
 472          Exponent_Is_Negative : Boolean;
 473          --  Set true for negative exponent
 474
 475          Extended_Digit_Value : Int;
 476          --  Extended digit value
 477
 478          Point_Scanned : Boolean;
 479          --  Flag for decimal point scanned in numeric literal
 480
 481          -----------------------
 482          -- Local Subprograms --
 483          -----------------------
 484
 485          procedure Error_Digit_Expected;
 486          --  Signal error of bad digit, Scan_Ptr points to the location at
 487          --  which the digit was expected on input, and is unchanged on return.
 488
 489          procedure Scan_Integer;
 490          --  Procedure to scan integer literal. On entry, Scan_Ptr points to a
 491          --  digit, on exit Scan_Ptr points past the last character of the
 492          --  integer.
 493          --
 494          --  For each digit encountered, UI_Int_Value is multiplied by 10, and
 495          --  the value of the digit added to the result. In addition, the
 496          --  value in Scale is decremented by one for each actual digit
 497          --  scanned.
 498
 499          --------------------------
 500          -- Error_Digit_Expected --
 501          --------------------------
 502
 503          procedure Error_Digit_Expected is
 504          begin
 505             Error_Msg_S ("digit expected");
 506          end Error_Digit_Expected;
 507
 508          ------------------
 509          -- Scan_Integer --
 510          ------------------
 511
 512          procedure Scan_Integer is
 513             C : Character;
 514             --  Next character scanned
 515
 516          begin
 517             C := Source (Scan_Ptr);
 518
 519             --  Loop through digits (allowing underlines)
 520
 521             loop
 522                Accumulate_Checksum (C);
 523                UI_Int_Value :=
 524                  UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
 525                Scan_Ptr := Scan_Ptr + 1;
 526                Scale := Scale - 1;
 527                C := Source (Scan_Ptr);
 528
 529                --  Case of underline encountered
 530
 531                if C = '_' then
 532
 533                   --  We do not accumulate the '_' in the checksum, so that
 534                   --  1_234 is equivalent to 1234, and does not trigger
 535                   --  compilation for "minimal recompilation" (gnatmake -m).
 536
 537                   loop
 538                      Scan_Ptr := Scan_Ptr + 1;
 539                      C := Source (Scan_Ptr);
 540                      exit when C /= '_';
 541                      Error_No_Double_Underline;
 542                   end loop;
 543
 544                   if C not in '0' .. '9' then
 545                      Error_Digit_Expected;
 546                      exit;
 547                   end if;
 548
 549                else
 550                   exit when C not in '0' .. '9';
 551                end if;
 552             end loop;
 553          end Scan_Integer;
 554
 555       --  Start of Processing for Nlit
 556
 557       begin
 558          Base := 10;
 559          UI_Base := Uint_10;
 560          UI_Int_Value := Uint_0;
 561          Scale := 0;
 562          Scan_Integer;
 563          Scale := 0;
 564          Point_Scanned := False;
 565          UI_Num_Value := UI_Int_Value;
 566
 567          --  Various possibilities now for continuing the literal are period,
 568          --  E/e (for exponent), or :/# (for based literal).
 569
 570          Scale := 0;
 571          C := Source (Scan_Ptr);
 572
 573          if C = '.' then
 574
 575             --  Scan out point, but do not scan past .. which is a range
 576             --  sequence, and must not be eaten up scanning a numeric literal.
 577
 578             while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
 579                Accumulate_Checksum ('.');
 580
 581                if Point_Scanned then
 582                   Error_Msg_S ("duplicate point ignored");
 583                end if;
 584
 585                Point_Scanned := True;
 586                Scan_Ptr := Scan_Ptr + 1;
 587                C := Source (Scan_Ptr);
 588
 589                if C not in '0' .. '9' then
 590                   Error_Msg
 591                     ("real literal cannot end with point", Scan_Ptr - 1);
 592                else
 593                   Scan_Integer;
 594                   UI_Num_Value := UI_Int_Value;
 595                end if;
 596             end loop;
 597
 598          --  Based literal case. The base is the value we already scanned.
 599          --  In the case of colon, we insist that the following character
 600          --  is indeed an extended digit or a period. This catches a number
 601          --  of common errors, as well as catching the well known tricky
 602          --  bug otherwise arising from "x : integer range 1 .. 10:= 6;"
 603
 604          elsif C = '#'
 605            or else (C = ':' and then
 606                       (Source (Scan_Ptr + 1) = '.'
 607                          or else
 608                        Source (Scan_Ptr + 1) in '0' .. '9'
 609                          or else
 610                        Source (Scan_Ptr + 1) in 'A' .. 'Z'
 611                          or else
 612                        Source (Scan_Ptr + 1) in 'a' .. 'z'))
 613          then
 614             if C = ':' then
 615                Obsolescent_Check (Scan_Ptr);
 616
 617                if Warn_On_Obsolescent_Feature then
 618                   Error_Msg_S
 619                     ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
 620                   Error_Msg_S
 621                     ("\use ""'#"" instead?");
 622                end if;
 623             end if;
 624
 625             Accumulate_Checksum (C);
 626             Base_Char := C;
 627             UI_Base := UI_Int_Value;
 628
 629             if UI_Base < 2 or else UI_Base > 16 then
 630                Error_Msg_SC ("base not 2-16");
 631                UI_Base := Uint_16;
 632             end if;
 633
 634             Base := UI_To_Int (UI_Base);
 635             Scan_Ptr := Scan_Ptr + 1;
 636
 637             --  Scan out extended integer [. integer]
 638
 639             C := Source (Scan_Ptr);
 640             UI_Int_Value := Uint_0;
 641             Scale := 0;
 642
 643             loop
 644                if C in '0' .. '9' then
 645                   Accumulate_Checksum (C);
 646                   Extended_Digit_Value :=
 647                     Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
 648
 649                elsif C in 'A' .. 'F' then
 650                   Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
 651                   Extended_Digit_Value :=
 652                     Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
 653
 654                elsif C in 'a' .. 'f' then
 655                   Accumulate_Checksum (C);
 656                   Extended_Digit_Value :=
 657                     Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
 658
 659                else
 660                   Error_Msg_S ("extended digit expected");
 661                   exit;
 662                end if;
 663
 664                if Extended_Digit_Value >= Base then
 665                   Error_Msg_S ("digit '>= base");
 666                end if;
 667
 668                UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
 669                Scale := Scale - 1;
 670                Scan_Ptr := Scan_Ptr + 1;
 671                C := Source (Scan_Ptr);
 672
 673                if C = '_' then
 674                   loop
 675                      Accumulate_Checksum ('_');
 676                      Scan_Ptr := Scan_Ptr + 1;
 677                      C := Source (Scan_Ptr);
 678                      exit when C /= '_';
 679                      Error_No_Double_Underline;
 680                   end loop;
 681
 682                elsif C = '.' then
 683                   Accumulate_Checksum ('.');
 684
 685                   if Point_Scanned then
 686                      Error_Msg_S ("duplicate point ignored");
 687                   end if;
 688
 689                   Scan_Ptr := Scan_Ptr + 1;
 690                   C := Source (Scan_Ptr);
 691                   Point_Scanned := True;
 692                   Scale := 0;
 693
 694                elsif C = Base_Char then
 695                   Accumulate_Checksum (C);
 696                   Scan_Ptr := Scan_Ptr + 1;
 697                   exit;
 698
 699                elsif C = '#' or else C = ':' then
 700                   Error_Msg_S ("based number delimiters must match");
 701                   Scan_Ptr := Scan_Ptr + 1;
 702                   exit;
 703
 704                elsif not Identifier_Char (C) then
 705                   if Base_Char = '#' then
 706                      Error_Msg_S ("missing '#");
 707                   else
 708                      Error_Msg_S ("missing ':");
 709                   end if;
 710
 711                   exit;
 712                end if;
 713
 714             end loop;
 715
 716             UI_Num_Value := UI_Int_Value;
 717          end if;
 718
 719          --  Scan out exponent
 720
 721          if not Point_Scanned then
 722             Scale := 0;
 723             UI_Scale := Uint_0;
 724          else
 725             UI_Scale := UI_From_Int (Scale);
 726          end if;
 727
 728          if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
 729             Accumulate_Checksum ('e');
 730             Scan_Ptr := Scan_Ptr + 1;
 731             Exponent_Is_Negative := False;
 732
 733             if Source (Scan_Ptr) = '+' then
 734                Accumulate_Checksum ('+');
 735                Scan_Ptr := Scan_Ptr + 1;
 736
 737             elsif Source (Scan_Ptr) = '-' then
 738                Accumulate_Checksum ('-');
 739
 740                if not Point_Scanned then
 741                   Error_Msg_S
 742                     ("negative exponent not allowed for integer literal");
 743                else
 744                   Exponent_Is_Negative := True;
 745                end if;
 746
 747                Scan_Ptr := Scan_Ptr + 1;
 748             end if;
 749
 750             UI_Int_Value := Uint_0;
 751
 752             if Source (Scan_Ptr) in '0' .. '9' then
 753                Scan_Integer;
 754             else
 755                Error_Digit_Expected;
 756             end if;
 757
 758             if Exponent_Is_Negative then
 759                UI_Scale := UI_Scale - UI_Int_Value;
 760             else
 761                UI_Scale := UI_Scale + UI_Int_Value;
 762             end if;
 763          end if;
 764
 765          --  Case of real literal to be returned
 766
 767          if Point_Scanned then
 768             Token := Tok_Real_Literal;
 769             Real_Literal_Value :=
 770               UR_From_Components (
 771                                   Num   => UI_Num_Value,
 772                                   Den   => -UI_Scale,
 773                                   Rbase => Base);
 774
 775          --  Case of integer literal to be returned
 776
 777          else
 778             Token := Tok_Integer_Literal;
 779
 780             if UI_Scale = 0 then
 781                Int_Literal_Value := UI_Num_Value;
 782
 783             --  Avoid doing possibly expensive calculations in cases like
 784             --  parsing 163E800_000# when semantics will not be done anyway.
 785             --  This is especially useful when parsing garbled input.
 786
 787             elsif Operating_Mode /= Check_Syntax
 788               and then (Serious_Errors_Detected = 0 or else Try_Semantics)
 789             then
 790                Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
 791
 792             else
 793                Int_Literal_Value := No_Uint;
 794             end if;
 795          end if;
 796
 797          Accumulate_Token_Checksum;
 798
 799          return;
 800       end Nlit;
 801
 802       ----------
 803       -- Slit --
 804       ----------
 805
 806       procedure Slit is
 807
 808          Delimiter : Character;
 809          --  Delimiter (first character of string)
 810
 811          C : Character;
 812          --  Current source program character
 813
 814          Code : Char_Code;
 815          --  Current character code value
 816
 817          Err : Boolean;
 818          --  Error flag for Scan_Wide call
 819
 820          procedure Error_Bad_String_Char;
 821          --  Signal bad character in string/character literal. On entry
 822          --  Scan_Ptr points to the improper character encountered during the
 823          --  scan. Scan_Ptr is not modified, so it still points to the bad
 824          --  character on return.
 825
 826          procedure Error_Unterminated_String;
 827          --  Procedure called if a line terminator character is encountered
 828          --  during scanning a string, meaning that the string is not properly
 829          --  terminated.
 830
 831          procedure Set_String;
 832          --  Procedure used to distinguish between string and operator symbol.
 833          --  On entry the string has been scanned out, and its characters
 834          --  start at Token_Ptr and end one character before Scan_Ptr. On exit
 835          --  Token is set to Tok_String_Literal or Tok_Operator_Symbol as
 836          --  appropriate, and Token_Node is appropriately initialized. In
 837          --  addition, in the operator symbol case, Token_Name is
 838          --  appropriately set.
 839
 840          ---------------------------
 841          -- Error_Bad_String_Char --
 842          ---------------------------
 843
 844          procedure Error_Bad_String_Char is
 845             C : constant Character := Source (Scan_Ptr);
 846
 847          begin
 848             if C = HT then
 849                Error_Msg_S ("horizontal tab not allowed in string");
 850
 851             elsif C = VT or else C = FF then
 852                Error_Msg_S ("format effector not allowed in string");
 853
 854             elsif C in Upper_Half_Character then
 855                Error_Msg_S ("(Ada 83) upper half character not allowed");
 856
 857             else
 858                Error_Msg_S ("control character not allowed in string");
 859             end if;
 860          end Error_Bad_String_Char;
 861
 862          -------------------------------
 863          -- Error_Unterminated_String --
 864          -------------------------------
 865
 866          procedure Error_Unterminated_String is
 867          begin
 868             --  An interesting little refinement. Consider the following
 869             --  examples:
 870
 871             --     A := "this is an unterminated string;
 872             --     A := "this is an unterminated string &
 873             --     P(A, "this is a parameter that didn't get terminated);
 874
 875             --  We fiddle a little to do slightly better placement in these
 876             --  cases also if there is white space at the end of the line we
 877             --  place the flag at the start of this white space, not at the
 878             --  end. Note that we only have to test for blanks, since tabs
 879             --  aren't allowed in strings in the first place and would have
 880             --  caused an error message.
 881
 882             --  Two more cases that we treat specially are:
 883
 884             --     A := "this string uses the wrong terminator'
 885             --     A := "this string uses the wrong terminator' &
 886
 887             --  In these cases we give a different error message as well
 888
 889             --  We actually reposition the scan pointer to the point where we
 890             --  place the flag in these cases, since it seems a better bet on
 891             --  the original intention.
 892
 893             while Source (Scan_Ptr - 1) = ' '
 894               or else Source (Scan_Ptr - 1) = '&'
 895             loop
 896                Scan_Ptr := Scan_Ptr - 1;
 897                Unstore_String_Char;
 898             end loop;
 899
 900             --  Check for case of incorrect string terminator, but single quote
 901             --  is not considered incorrect if the opening terminator misused
 902             --  a single quote (error message already given).
 903
 904             if Delimiter /= '''
 905               and then Source (Scan_Ptr - 1) = '''
 906             then
 907                Unstore_String_Char;
 908                Error_Msg
 909                  ("incorrect string terminator character", Scan_Ptr - 1);
 910                return;
 911             end if;
 912
 913             if Source (Scan_Ptr - 1) = ';' then
 914                Scan_Ptr := Scan_Ptr - 1;
 915                Unstore_String_Char;
 916
 917                if Source (Scan_Ptr - 1) = ')' then
 918                   Scan_Ptr := Scan_Ptr - 1;
 919                   Unstore_String_Char;
 920                end if;
 921             end if;
 922
 923             Error_Msg_S ("missing string quote");
 924          end Error_Unterminated_String;
 925
 926          ----------------
 927          -- Set_String --
 928          ----------------
 929
 930          procedure Set_String is
 931             Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
 932             C1   : Character;
 933             C2   : Character;
 934             C3   : Character;
 935
 936          begin
 937             --  Token_Name is currently set to Error_Name. The following
 938             --  section of code resets Token_Name to the proper Name_Op_xx
 939             --  value if the string is a valid operator symbol, otherwise it is
 940             --  left set to Error_Name.
 941
 942             if Slen = 1 then
 943                C1 := Source (Token_Ptr + 1);
 944
 945                case C1 is
 946                   when '=' =>
 947                      Token_Name := Name_Op_Eq;
 948
 949                   when '>' =>
 950                      Token_Name := Name_Op_Gt;
 951
 952                   when '<' =>
 953                      Token_Name := Name_Op_Lt;
 954
 955                   when '+' =>
 956                      Token_Name := Name_Op_Add;
 957
 958                   when '-' =>
 959                      Token_Name := Name_Op_Subtract;
 960
 961                   when '&' =>
 962                      Token_Name := Name_Op_Concat;
 963
 964                   when '*' =>
 965                      Token_Name := Name_Op_Multiply;
 966
 967                   when '/' =>
 968                      Token_Name := Name_Op_Divide;
 969
 970                   when others =>
 971                      null;
 972                end case;
 973
 974             elsif Slen = 2 then
 975                C1 := Source (Token_Ptr + 1);
 976                C2 := Source (Token_Ptr + 2);
 977
 978                if C1 = '*' and then C2 = '*' then
 979                   Token_Name := Name_Op_Expon;
 980
 981                elsif C2 = '=' then
 982
 983                   if C1 = '/' then
 984                      Token_Name := Name_Op_Ne;
 985                   elsif C1 = '<' then
 986                      Token_Name := Name_Op_Le;
 987                   elsif C1 = '>' then
 988                      Token_Name := Name_Op_Ge;
 989                   end if;
 990
 991                elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
 992                  (C2 = 'R' or else C2 = 'r')
 993                then
 994                   Token_Name := Name_Op_Or;
 995                end if;
 996
 997             elsif Slen = 3 then
 998                C1 := Source (Token_Ptr + 1);
 999                C2 := Source (Token_Ptr + 2);
1000                C3 := Source (Token_Ptr + 3);
1001
1002                if (C1 = 'A' or else C1 = 'a') and then       -- AND
1003                  (C2 = 'N' or else C2 = 'n') and then
1004                  (C3 = 'D' or else C3 = 'd')
1005                then
1006                   Token_Name := Name_Op_And;
1007
1008                elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
1009                  (C2 = 'B' or else C2 = 'b') and then
1010                  (C3 = 'S' or else C3 = 's')
1011                then
1012                   Token_Name := Name_Op_Abs;
1013
1014                elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
1015                  (C2 = 'O' or else C2 = 'o') and then
1016                  (C3 = 'D' or else C3 = 'd')
1017                then
1018                   Token_Name := Name_Op_Mod;
1019
1020                elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
1021                  (C2 = 'O' or else C2 = 'o') and then
1022                  (C3 = 'T' or else C3 = 't')
1023                then
1024                   Token_Name := Name_Op_Not;
1025
1026                elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
1027                  (C2 = 'E' or else C2 = 'e') and then
1028                  (C3 = 'M' or else C3 = 'm')
1029                then
1030                   Token_Name := Name_Op_Rem;
1031
1032                elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
1033                  (C2 = 'O' or else C2 = 'o') and then
1034                  (C3 = 'R' or else C3 = 'r')
1035                then
1036                   Token_Name := Name_Op_Xor;
1037                end if;
1038
1039             end if;
1040
1041             --  If it is an operator symbol, then Token_Name is set. If it is
1042             --  some other string value, then Token_Name still contains
1043             --  Error_Name.
1044
1045             if Token_Name = Error_Name then
1046                Token := Tok_String_Literal;
1047
1048             else
1049                Token := Tok_Operator_Symbol;
1050             end if;
1051          end Set_String;
1052
1053       --  Start of processing for Slit
1054
1055       begin
1056          --  On entry, Scan_Ptr points to the opening character of the string
1057          --  which is either a percent, double quote, or apostrophe (single
1058          --  quote). The latter case is an error detected by the character
1059          --  literal circuit.
1060
1061          Delimiter := Source (Scan_Ptr);
1062          Accumulate_Checksum (Delimiter);
1063          Start_String;
1064          Scan_Ptr := Scan_Ptr + 1;
1065
1066          --  Loop to scan out characters of string literal
1067
1068          loop
1069             C := Source (Scan_Ptr);
1070
1071             if C = Delimiter then
1072                Accumulate_Checksum (C);
1073                Scan_Ptr := Scan_Ptr + 1;
1074                exit when Source (Scan_Ptr) /= Delimiter;
1075                Code := Get_Char_Code (C);
1076                Accumulate_Checksum (C);
1077                Scan_Ptr := Scan_Ptr + 1;
1078
1079             else
1080                if C = '"' and then Delimiter = '%' then
1081                   Error_Msg_S
1082                     ("quote not allowed in percent delimited string");
1083                   Code := Get_Char_Code (C);
1084                   Scan_Ptr := Scan_Ptr + 1;
1085
1086                elsif (C = ESC
1087                         and then Wide_Character_Encoding_Method
1088                                    in WC_ESC_Encoding_Method)
1089                  or else (C in Upper_Half_Character
1090                             and then Upper_Half_Encoding)
1091                  or else (C = '['
1092                             and then Source (Scan_Ptr + 1) = '"'
1093                             and then Identifier_Char (Source (Scan_Ptr + 2)))
1094                then
1095                   Wptr := Scan_Ptr;
1096                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1097
1098                   if Err then
1099                      Error_Illegal_Wide_Character;
1100                      Code := Get_Char_Code (' ');
1101                   end if;
1102
1103                   Accumulate_Checksum (Code);
1104
1105                   if Ada_Version >= Ada_05
1106                     and then Is_UTF_32_Non_Graphic (Code)
1107                   then
1108                      Error_Msg
1109                        ("(Ada 2005) non-graphic character not permitted " &
1110                         "in string literal", Wptr);
1111                   end if;
1112
1113                else
1114                   Accumulate_Checksum (C);
1115
1116                   if C not in Graphic_Character then
1117                      if C in Line_Terminator then
1118                         Error_Unterminated_String;
1119                         exit;
1120
1121                      elsif C in Upper_Half_Character then
1122                         if Ada_Version = Ada_83 then
1123                            Error_Bad_String_Char;
1124                         end if;
1125
1126                      else
1127                         Error_Bad_String_Char;
1128                      end if;
1129                   end if;
1130
1131                   Code := Get_Char_Code (C);
1132                   Scan_Ptr := Scan_Ptr + 1;
1133                end if;
1134             end if;
1135
1136             Store_String_Char (Code);
1137
1138             if not In_Character_Range (Code) then
1139                Wide_Character_Found := True;
1140             end if;
1141          end loop;
1142
1143          String_Literal_Id := End_String;
1144          Set_String;
1145          return;
1146       end Slit;
1147
1148    --  Start of processing for Scan
1149
1150    begin
1151       Prev_Token := Token;
1152       Prev_Token_Ptr := Token_Ptr;
1153       Token_Name := Error_Name;
1154
1155       --  The following loop runs more than once only if a format effector
1156       --  (tab, vertical tab, form  feed, line feed, carriage return) is
1157       --  encountered and skipped, or some error situation, such as an
1158       --  illegal character, is encountered.
1159
1160       <<Scan_Next_Character>>
1161
1162       loop
1163          --  Skip past blanks, loop is opened up for speed
1164
1165          while Source (Scan_Ptr) = ' ' loop
1166             if Source (Scan_Ptr + 1) /= ' ' then
1167                Scan_Ptr := Scan_Ptr + 1;
1168                exit;
1169             end if;
1170
1171             if Source (Scan_Ptr + 2) /= ' ' then
1172                Scan_Ptr := Scan_Ptr + 2;
1173                exit;
1174             end if;
1175
1176             if Source (Scan_Ptr + 3) /= ' ' then
1177                Scan_Ptr := Scan_Ptr + 3;
1178                exit;
1179             end if;
1180
1181             if Source (Scan_Ptr + 4) /= ' ' then
1182                Scan_Ptr := Scan_Ptr + 4;
1183                exit;
1184             end if;
1185
1186             if Source (Scan_Ptr + 5) /= ' ' then
1187                Scan_Ptr := Scan_Ptr + 5;
1188                exit;
1189             end if;
1190
1191             if Source (Scan_Ptr + 6) /= ' ' then
1192                Scan_Ptr := Scan_Ptr + 6;
1193                exit;
1194             end if;
1195
1196             if Source (Scan_Ptr + 7) /= ' ' then
1197                Scan_Ptr := Scan_Ptr + 7;
1198                exit;
1199             end if;
1200
1201             Scan_Ptr := Scan_Ptr + 8;
1202          end loop;
1203
1204          --  We are now at a non-blank character, which is the first character
1205          --  of the token we will scan, and hence the value of Token_Ptr.
1206
1207          Token_Ptr := Scan_Ptr;
1208
1209          --  Here begins the main case statement which transfers control on the
1210          --  basis of the non-blank character we have encountered.
1211
1212          case Source (Scan_Ptr) is
1213
1214          --  Line terminator characters
1215
1216          when CR | LF | FF | VT =>
1217             goto Scan_Line_Terminator;
1218
1219          --  Horizontal tab, just skip past it
1220
1221          when HT =>
1222             if Style_Check then Style.Check_HT; end if;
1223             Scan_Ptr := Scan_Ptr + 1;
1224
1225          --  End of file character, treated as an end of file only if it is
1226          --  the last character in the buffer, otherwise it is ignored.
1227
1228          when EOF =>
1229             if Scan_Ptr = Source_Last (Current_Source_File) then
1230                Check_End_Of_Line;
1231                Token := Tok_EOF;
1232                return;
1233             else
1234                Scan_Ptr := Scan_Ptr + 1;
1235             end if;
1236
1237          --  Ampersand
1238
1239          when '&' =>
1240             Accumulate_Checksum ('&');
1241
1242             if Source (Scan_Ptr + 1) = '&' then
1243                Error_Msg_S ("'&'& should be `AND THEN`");
1244                Scan_Ptr := Scan_Ptr + 2;
1245                Token := Tok_And;
1246                return;
1247
1248             else
1249                Scan_Ptr := Scan_Ptr + 1;
1250                Token := Tok_Ampersand;
1251                return;
1252             end if;
1253
1254          --  Asterisk (can be multiplication operator or double asterisk which
1255          --  is the exponentiation compound delimiter).
1256
1257          when '*' =>
1258             Accumulate_Checksum ('*');
1259
1260             if Source (Scan_Ptr + 1) = '*' then
1261                Accumulate_Checksum ('*');
1262                Scan_Ptr := Scan_Ptr + 2;
1263                Token := Tok_Double_Asterisk;
1264                return;
1265
1266             else
1267                Scan_Ptr := Scan_Ptr + 1;
1268                Token := Tok_Asterisk;
1269                return;
1270             end if;
1271
1272          --  Colon, which can either be an isolated colon, or part of an
1273          --  assignment compound delimiter.
1274
1275          when ':' =>
1276             Accumulate_Checksum (':');
1277
1278             if Double_Char_Token ('=') then
1279                Token := Tok_Colon_Equal;
1280                if Style_Check then Style.Check_Colon_Equal; end if;
1281                return;
1282
1283             elsif Source (Scan_Ptr + 1) = '-'
1284               and then Source (Scan_Ptr + 2) /= '-'
1285             then
1286                Token := Tok_Colon_Equal;
1287                Error_Msg (":- should be :=", Scan_Ptr);
1288                Scan_Ptr := Scan_Ptr + 2;
1289                return;
1290
1291             else
1292                Scan_Ptr := Scan_Ptr + 1;
1293                Token := Tok_Colon;
1294                if Style_Check then Style.Check_Colon; end if;
1295                return;
1296             end if;
1297
1298          --  Left parenthesis
1299
1300          when '(' =>
1301             Accumulate_Checksum ('(');
1302             Scan_Ptr := Scan_Ptr + 1;
1303             Token := Tok_Left_Paren;
1304             if Style_Check then Style.Check_Left_Paren; end if;
1305             return;
1306
1307          --  Left bracket
1308
1309          when '[' =>
1310             if Source (Scan_Ptr + 1) = '"' then
1311                goto Scan_Wide_Character;
1312
1313             else
1314                Error_Msg_S ("illegal character, replaced by ""(""");
1315                Scan_Ptr := Scan_Ptr + 1;
1316                Token := Tok_Left_Paren;
1317                return;
1318             end if;
1319
1320          --  Left brace
1321
1322          when '{' =>
1323             Error_Msg_S ("illegal character, replaced by ""(""");
1324             Scan_Ptr := Scan_Ptr + 1;
1325             Token := Tok_Left_Paren;
1326             return;
1327
1328          --  Comma
1329
1330          when ',' =>
1331             Accumulate_Checksum (',');
1332             Scan_Ptr := Scan_Ptr + 1;
1333             Token := Tok_Comma;
1334             if Style_Check then Style.Check_Comma; end if;
1335             return;
1336
1337          --  Dot, which is either an isolated period, or part of a double dot
1338          --  compound delimiter sequence. We also check for the case of a
1339          --  digit following the period, to give a better error message.
1340
1341          when '.' =>
1342             Accumulate_Checksum ('.');
1343
1344             if Double_Char_Token ('.') then
1345                Token := Tok_Dot_Dot;
1346                if Style_Check then Style.Check_Dot_Dot; end if;
1347                return;
1348
1349             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1350                Error_Msg_S ("numeric literal cannot start with point");
1351                Scan_Ptr := Scan_Ptr + 1;
1352
1353             else
1354                Scan_Ptr := Scan_Ptr + 1;
1355                Token := Tok_Dot;
1356                return;
1357             end if;
1358
1359          --  Equal, which can either be an equality operator, or part of the
1360          --  arrow (=>) compound delimiter.
1361
1362          when '=' =>
1363             Accumulate_Checksum ('=');
1364
1365             if Double_Char_Token ('>') then
1366                Token := Tok_Arrow;
1367                if Style_Check then Style.Check_Arrow; end if;
1368                return;
1369
1370             elsif Source (Scan_Ptr + 1) = '=' then
1371                Error_Msg_S ("== should be =");
1372                Scan_Ptr := Scan_Ptr + 1;
1373             end if;
1374
1375             Scan_Ptr := Scan_Ptr + 1;
1376             Token := Tok_Equal;
1377             return;
1378
1379          --  Greater than, which can be a greater than operator, greater than
1380          --  or equal operator, or first character of a right label bracket.
1381
1382          when '>' =>
1383             Accumulate_Checksum ('>');
1384
1385             if Double_Char_Token ('=') then
1386                Token := Tok_Greater_Equal;
1387                return;
1388
1389             elsif Double_Char_Token ('>') then
1390                Token := Tok_Greater_Greater;
1391                return;
1392
1393             else
1394                Scan_Ptr := Scan_Ptr + 1;
1395                Token := Tok_Greater;
1396                return;
1397             end if;
1398
1399          --  Less than, which can be a less than operator, less than or equal
1400          --  operator, or the first character of a left label bracket, or the
1401          --  first character of a box (<>) compound delimiter.
1402
1403          when '<' =>
1404             Accumulate_Checksum ('<');
1405
1406             if Double_Char_Token ('=') then
1407                Token := Tok_Less_Equal;
1408                return;
1409
1410             elsif Double_Char_Token ('>') then
1411                Token := Tok_Box;
1412                if Style_Check then Style.Check_Box; end if;
1413                return;
1414
1415             elsif Double_Char_Token ('<') then
1416                Token := Tok_Less_Less;
1417                return;
1418
1419             else
1420                Scan_Ptr := Scan_Ptr + 1;
1421                Token := Tok_Less;
1422                return;
1423             end if;
1424
1425          --  Minus, which is either a subtraction operator, or the first
1426          --  character of double minus starting a comment
1427
1428          when '-' => Minus_Case : begin
1429             if Source (Scan_Ptr + 1) = '>' then
1430                Error_Msg_S ("invalid token");
1431                Scan_Ptr := Scan_Ptr + 2;
1432                Token := Tok_Arrow;
1433                return;
1434
1435             elsif Source (Scan_Ptr + 1) /= '-' then
1436                Accumulate_Checksum ('-');
1437                Scan_Ptr := Scan_Ptr + 1;
1438                Token := Tok_Minus;
1439                return;
1440
1441             --  Comment
1442
1443             else -- Source (Scan_Ptr + 1) = '-' then
1444                if Style_Check then Style.Check_Comment; end if;
1445                Scan_Ptr := Scan_Ptr + 2;
1446                Start_Of_Comment := Scan_Ptr;
1447
1448                --  Loop to scan comment (this loop runs more than once only if
1449                --  a horizontal tab or other non-graphic character is scanned)
1450
1451                loop
1452                   --  Scan to non graphic character (opened up for speed)
1453
1454                   --  Note that we just eat left brackets, which means that
1455                   --  bracket notation cannot be used for end of line
1456                   --  characters in comments. This seems a reasonable choice,
1457                   --  since no one would ever use brackets notation in a real
1458                   --  program in this situation, and if we allow brackets
1459                   --  notation, we forbid some valid comments which contain a
1460                   --  brackets sequence that happens to match an end of line
1461                   --  character.
1462
1463                   loop
1464                      exit when Source (Scan_Ptr) not in Graphic_Character;
1465                      Scan_Ptr := Scan_Ptr + 1;
1466                      exit when Source (Scan_Ptr) not in Graphic_Character;
1467                      Scan_Ptr := Scan_Ptr + 1;
1468                      exit when Source (Scan_Ptr) not in Graphic_Character;
1469                      Scan_Ptr := Scan_Ptr + 1;
1470                      exit when Source (Scan_Ptr) not in Graphic_Character;
1471                      Scan_Ptr := Scan_Ptr + 1;
1472                      exit when Source (Scan_Ptr) not in Graphic_Character;
1473                      Scan_Ptr := Scan_Ptr + 1;
1474                   end loop;
1475
1476                   --  Keep going if horizontal tab
1477
1478                   if Source (Scan_Ptr) = HT then
1479                      if Style_Check then Style.Check_HT; end if;
1480                      Scan_Ptr := Scan_Ptr + 1;
1481
1482                   --  Terminate scan of comment if line terminator
1483
1484                   elsif Source (Scan_Ptr) in Line_Terminator then
1485                      exit;
1486
1487                   --  Terminate scan of comment if end of file encountered
1488                   --  (embedded EOF character or real last character in file)
1489
1490                   elsif Source (Scan_Ptr) = EOF then
1491                      exit;
1492
1493                   --  If we have a wide character, we have to scan it out,
1494                   --  because it might be a legitimate line terminator
1495
1496                   elsif (Source (Scan_Ptr) = ESC
1497                            and then Identifier_Char (ESC))
1498                     or else
1499                          (Source (Scan_Ptr) in Upper_Half_Character
1500                             and then Upper_Half_Encoding)
1501                   then
1502                      declare
1503                         Wptr : constant Source_Ptr := Scan_Ptr;
1504                         Code : Char_Code;
1505                         Err  : Boolean;
1506
1507                      begin
1508                         Scan_Wide (Source, Scan_Ptr, Code, Err);
1509
1510                         --  If not well formed wide character, then just skip
1511                         --  past it and ignore it.
1512
1513                         if Err then
1514                            Scan_Ptr := Wptr + 1;
1515
1516                         --  If UTF_32 terminator, terminate comment scan
1517
1518                         elsif Is_UTF_32_Line_Terminator (Code) then
1519                            Scan_Ptr := Wptr;
1520                            exit;
1521                         end if;
1522                      end;
1523
1524                   --  Keep going if character in 80-FF range, or is ESC. These
1525                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
1526                   --  They are allowed even in Ada 83 mode according to the
1527                   --  approved AI. ESC was added to the AI in June 93.
1528
1529                   elsif Source (Scan_Ptr) in Upper_Half_Character
1530                      or else Source (Scan_Ptr) = ESC
1531                   then
1532                      Scan_Ptr := Scan_Ptr + 1;
1533
1534                   --  Otherwise we have an illegal comment character
1535
1536                   else
1537                      Error_Illegal_Character;
1538                   end if;
1539                end loop;
1540
1541                --  Note that, except when comments are tokens, we do NOT
1542                --  execute a return here, instead we fall through to reexecute
1543                --  the scan loop to look for a token.
1544
1545                if Comment_Is_Token then
1546                   Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1547                   Name_Buffer (1 .. Name_Len) :=
1548                     String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1549                   Comment_Id := Name_Find;
1550                   Token := Tok_Comment;
1551                   return;
1552                end if;
1553             end if;
1554          end Minus_Case;
1555
1556          --  Double quote starting a string literal
1557
1558          when '"' =>
1559             Slit;
1560             Post_Scan;
1561             return;
1562
1563          --  Percent starting a string literal
1564
1565          when '%' =>
1566             Obsolescent_Check (Token_Ptr);
1567
1568             if Warn_On_Obsolescent_Feature then
1569                Error_Msg_S
1570                  ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1571                Error_Msg_S
1572                  ("\use """""" instead?");
1573             end if;
1574
1575             Slit;
1576             Post_Scan;
1577             return;
1578
1579          --  Apostrophe. This can either be the start of a character literal,
1580          --  or an isolated apostrophe used in a qualified expression or an
1581          --  attribute. We treat it as a character literal if it does not
1582          --  follow a right parenthesis, identifier, the keyword ALL or
1583          --  a literal. This means that we correctly treat constructs like:
1584
1585          --    A := CHARACTER'('A');
1586
1587          --  Note that RM-2.2(7) does not require a separator between
1588          --  "CHARACTER" and "'" in the above.
1589
1590          when ''' => Char_Literal_Case : declare
1591             Code : Char_Code;
1592             Err  : Boolean;
1593
1594          begin
1595             Accumulate_Checksum (''');
1596             Scan_Ptr := Scan_Ptr + 1;
1597
1598             --  Here is where we make the test to distinguish the cases. Treat
1599             --  as apostrophe if previous token is an identifier, right paren
1600             --  or the reserved word "all" (latter case as in A.all'Address)
1601             --  (or the reserved word "project" in project files). Also treat
1602             --  it as apostrophe after a literal (this catches some legitimate
1603             --  cases, like A."abs"'Address, and also gives better error
1604             --  behavior for impossible cases like 123'xxx).
1605
1606             if Prev_Token = Tok_Identifier
1607                or else Prev_Token = Tok_Right_Paren
1608                or else Prev_Token = Tok_All
1609                or else Prev_Token = Tok_Project
1610                or else Prev_Token in Token_Class_Literal
1611             then
1612                Token := Tok_Apostrophe;
1613                if Style_Check then Style.Check_Apostrophe; end if;
1614                return;
1615
1616             --  Otherwise the apostrophe starts a character literal
1617
1618             else
1619                --  Case of wide character literal
1620
1621                if (Source (Scan_Ptr) = ESC
1622                      and then
1623                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1624                  or else
1625                    (Source (Scan_Ptr) in Upper_Half_Character
1626                      and then
1627                     Upper_Half_Encoding)
1628                  or else
1629                    (Source (Scan_Ptr) = '['
1630                      and then
1631                     Source (Scan_Ptr + 1) = '"')
1632                then
1633                   Wptr := Scan_Ptr;
1634                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1635                   Accumulate_Checksum (Code);
1636
1637                   if Err then
1638                      Error_Illegal_Wide_Character;
1639                      Code := Character'Pos (' ');
1640
1641                   elsif Ada_Version >= Ada_05
1642                     and then Is_UTF_32_Non_Graphic (Code)
1643                   then
1644                      Error_Msg
1645                        ("(Ada 2005) non-graphic character not permitted " &
1646                         "in character literal", Wptr);
1647                   end if;
1648
1649                   if Source (Scan_Ptr) /= ''' then
1650                      Error_Msg_S ("missing apostrophe");
1651                   else
1652                      Scan_Ptr := Scan_Ptr + 1;
1653                   end if;
1654
1655                --  If we do not find a closing quote in the expected place then
1656                --  assume that we have a misguided attempt at a string literal.
1657
1658                --  However, if previous token is RANGE, then we return an
1659                --  apostrophe instead since this gives better error recovery
1660
1661                elsif Source (Scan_Ptr + 1) /= ''' then
1662                   if Prev_Token = Tok_Range then
1663                      Token := Tok_Apostrophe;
1664                      return;
1665
1666                   else
1667                      Scan_Ptr := Scan_Ptr - 1;
1668                      Error_Msg_S
1669                        ("strings are delimited by double quote character");
1670                      Slit;
1671                      Post_Scan;
1672                      return;
1673                   end if;
1674
1675                --  Otherwise we have a (non-wide) character literal
1676
1677                else
1678                   Accumulate_Checksum (Source (Scan_Ptr));
1679
1680                   if Source (Scan_Ptr) not in Graphic_Character then
1681                      if Source (Scan_Ptr) in Upper_Half_Character then
1682                         if Ada_Version = Ada_83 then
1683                            Error_Illegal_Character;
1684                         end if;
1685
1686                      else
1687                         Error_Illegal_Character;
1688                      end if;
1689                   end if;
1690
1691                   Code := Get_Char_Code (Source (Scan_Ptr));
1692                   Scan_Ptr := Scan_Ptr + 2;
1693                end if;
1694
1695                --  Fall through here with Scan_Ptr updated past the closing
1696                --  quote, and Code set to the Char_Code value for the literal
1697
1698                Accumulate_Checksum (''');
1699                Token := Tok_Char_Literal;
1700                Set_Character_Literal_Name (Code);
1701                Token_Name := Name_Find;
1702                Character_Code := Code;
1703                Post_Scan;
1704                return;
1705             end if;
1706          end Char_Literal_Case;
1707
1708          --  Right parenthesis
1709
1710          when ')' =>
1711             Accumulate_Checksum (')');
1712             Scan_Ptr := Scan_Ptr + 1;
1713             Token := Tok_Right_Paren;
1714             if Style_Check then Style.Check_Right_Paren; end if;
1715             return;
1716
1717          --  Right bracket or right brace, treated as right paren
1718
1719          when ']' | '}' =>
1720             Error_Msg_S ("illegal character, replaced by "")""");
1721             Scan_Ptr := Scan_Ptr + 1;
1722             Token := Tok_Right_Paren;
1723             return;
1724
1725          --  Slash (can be division operator or first character of not equal)
1726
1727          when '/' =>
1728             Accumulate_Checksum ('/');
1729
1730             if Double_Char_Token ('=') then
1731                Token := Tok_Not_Equal;
1732                return;
1733             else
1734                Scan_Ptr := Scan_Ptr + 1;
1735                Token := Tok_Slash;
1736                return;
1737             end if;
1738
1739          --  Semicolon
1740
1741          when ';' =>
1742             Accumulate_Checksum (';');
1743             Scan_Ptr := Scan_Ptr + 1;
1744             Token := Tok_Semicolon;
1745             if Style_Check then Style.Check_Semicolon; end if;
1746             return;
1747
1748          --  Vertical bar
1749
1750          when '|' => Vertical_Bar_Case : begin
1751             Accumulate_Checksum ('|');
1752
1753             --  Special check for || to give nice message
1754
1755             if Source (Scan_Ptr + 1) = '|' then
1756                Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1757                Scan_Ptr := Scan_Ptr + 2;
1758                Token := Tok_Or;
1759                return;
1760
1761             else
1762                Scan_Ptr := Scan_Ptr + 1;
1763                Token := Tok_Vertical_Bar;
1764                if Style_Check then Style.Check_Vertical_Bar; end if;
1765                return;
1766             end if;
1767          end Vertical_Bar_Case;
1768
1769          --  Exclamation, replacement character for vertical bar
1770
1771          when '!' => Exclamation_Case : begin
1772             Accumulate_Checksum ('!');
1773             Obsolescent_Check (Token_Ptr);
1774
1775             if Warn_On_Obsolescent_Feature then
1776                Error_Msg_S
1777                  ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1778                Error_Msg_S
1779                  ("\use ""'|"" instead?");
1780             end if;
1781
1782             if Source (Scan_Ptr + 1) = '=' then
1783                Error_Msg_S ("'!= should be /=");
1784                Scan_Ptr := Scan_Ptr + 2;
1785                Token := Tok_Not_Equal;
1786                return;
1787
1788             else
1789                Scan_Ptr := Scan_Ptr + 1;
1790                Token := Tok_Vertical_Bar;
1791                return;
1792             end if;
1793          end Exclamation_Case;
1794
1795          --  Plus
1796
1797          when '+' => Plus_Case : begin
1798             Accumulate_Checksum ('+');
1799             Scan_Ptr := Scan_Ptr + 1;
1800             Token := Tok_Plus;
1801             return;
1802          end Plus_Case;
1803
1804          --  Digits starting a numeric literal
1805
1806          when '0' .. '9' =>
1807             Nlit;
1808
1809             if Identifier_Char (Source (Scan_Ptr)) then
1810                Error_Msg_S
1811                  ("delimiter required between literal and identifier");
1812             end if;
1813             Post_Scan;
1814             return;
1815
1816          --  Lower case letters
1817
1818          when 'a' .. 'z' =>
1819             Name_Len := 1;
1820             Underline_Found := False;
1821             Name_Buffer (1) := Source (Scan_Ptr);
1822             Accumulate_Checksum (Name_Buffer (1));
1823             Scan_Ptr := Scan_Ptr + 1;
1824             goto Scan_Identifier;
1825
1826          --  Upper case letters
1827
1828          when 'A' .. 'Z' =>
1829             Name_Len := 1;
1830             Underline_Found := False;
1831             Name_Buffer (1) :=
1832               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1833             Accumulate_Checksum (Name_Buffer (1));
1834             Scan_Ptr := Scan_Ptr + 1;
1835             goto Scan_Identifier;
1836
1837          --  Underline character
1838
1839          when '_' =>
1840             if Special_Characters ('_') then
1841                Token_Ptr := Scan_Ptr;
1842                Scan_Ptr := Scan_Ptr + 1;
1843                Token := Tok_Special;
1844                Special_Character := '_';
1845                return;
1846             end if;
1847
1848             Error_Msg_S ("identifier cannot start with underline");
1849             Name_Len := 1;
1850             Name_Buffer (1) := '_';
1851             Scan_Ptr := Scan_Ptr + 1;
1852             Underline_Found := False;
1853             goto Scan_Identifier;
1854
1855          --  Space (not possible, because we scanned past blanks)
1856
1857          when ' ' =>
1858             raise Program_Error;
1859
1860          --  Characters in top half of ASCII 8-bit chart
1861
1862          when Upper_Half_Character =>
1863
1864             --  Wide character case
1865
1866             if Upper_Half_Encoding then
1867                goto Scan_Wide_Character;
1868
1869             --  Otherwise we have OK Latin-1 character
1870
1871             else
1872                --  Upper half characters may possibly be identifier letters
1873                --  but can never be digits, so Identifier_Char can be used to
1874                --  test for a valid start of identifier character.
1875
1876                if Identifier_Char (Source (Scan_Ptr)) then
1877                   Name_Len := 0;
1878                   Underline_Found := False;
1879                   goto Scan_Identifier;
1880                else
1881                   Error_Illegal_Character;
1882                end if;
1883             end if;
1884
1885          when ESC =>
1886
1887             --  ESC character, possible start of identifier if wide characters
1888             --  using ESC encoding are allowed in identifiers, which we can
1889             --  tell by looking at the Identifier_Char flag for ESC, which is
1890             --  only true if these conditions are met. In Ada 2005 mode, may
1891             --  also be valid UTF_32 space or line terminator character.
1892
1893             if Identifier_Char (ESC) then
1894                Name_Len := 0;
1895                goto Scan_Wide_Character;
1896             else
1897                Error_Illegal_Character;
1898             end if;
1899
1900          --  Invalid control characters
1901
1902          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | SO  |
1903               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1904               EM  | FS  | GS  | RS  | US  | DEL
1905          =>
1906             Error_Illegal_Character;
1907
1908          --  Invalid graphic characters
1909
1910          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1911
1912             --  If Set_Special_Character has been called for this character,
1913             --  set Scans.Special_Character and return a Special token.
1914
1915             if Special_Characters (Source (Scan_Ptr)) then
1916                Token_Ptr := Scan_Ptr;
1917                Token := Tok_Special;
1918                Special_Character := Source (Scan_Ptr);
1919                Scan_Ptr := Scan_Ptr + 1;
1920                return;
1921
1922             --  Otherwise, this is an illegal character
1923
1924             else
1925                Error_Illegal_Character;
1926             end if;
1927
1928          --  End switch on non-blank character
1929
1930          end case;
1931
1932       --  End loop past format effectors. The exit from this loop is by
1933       --  executing a return statement following completion of token scan
1934       --  (control never falls out of this loop to the code which follows)
1935
1936       end loop;
1937
1938       --  Wide_Character scanning routine. On entry we have encountered the
1939       --  initial character of a wide character sequence.
1940
1941       <<Scan_Wide_Character>>
1942
1943          declare
1944             Code : Char_Code;
1945             Err  : Boolean;
1946
1947          begin
1948             Wptr := Scan_Ptr;
1949             Scan_Wide (Source, Scan_Ptr, Code, Err);
1950
1951             --  If bad wide character, signal error and continue scan
1952
1953             if Err then
1954                Error_Illegal_Wide_Character;
1955                goto Scan_Next_Character;
1956
1957             --  If OK letter, reset scan ptr and go scan identifier
1958
1959             elsif Is_UTF_32_Letter (Code) then
1960                Scan_Ptr := Wptr;
1961                Name_Len := 0;
1962                Underline_Found := False;
1963                goto Scan_Identifier;
1964
1965             --  If OK wide space, ignore and keep scanning (we do not include
1966             --  any ignored spaces in checksum)
1967
1968             elsif Is_UTF_32_Space (Code) then
1969                goto Scan_Next_Character;
1970
1971             --  If OK wide line terminator, terminate current line
1972
1973             elsif Is_UTF_32_Line_Terminator (Code) then
1974                Scan_Ptr := Wptr;
1975                goto Scan_Line_Terminator;
1976
1977             --  Punctuation is an error (at start of identifier)
1978
1979             elsif Is_UTF_32_Punctuation (Code) then
1980                Error_Msg
1981                  ("identifier cannot start with punctuation", Wptr);
1982                Scan_Ptr := Wptr;
1983                Name_Len := 0;
1984                Underline_Found := False;
1985                goto Scan_Identifier;
1986
1987             --  Mark character is an error (at start of identifer)
1988
1989             elsif Is_UTF_32_Mark (Code) then
1990                Error_Msg
1991                  ("identifier cannot start with mark character", Wptr);
1992                Scan_Ptr := Wptr;
1993                Name_Len := 0;
1994                Underline_Found := False;
1995                goto Scan_Identifier;
1996
1997             --  Other format character is an error (at start of identifer)
1998
1999             elsif Is_UTF_32_Other (Code) then
2000                Error_Msg
2001                  ("identifier cannot start with other format character", Wptr);
2002                Scan_Ptr := Wptr;
2003                Name_Len := 0;
2004                Underline_Found := False;
2005                goto Scan_Identifier;
2006
2007             --  Extended digit character is an error. Could be bad start of
2008             --  identifier or bad literal. Not worth doing too much to try to
2009             --  distinguish these cases, but we will do a little bit.
2010
2011             elsif Is_UTF_32_Digit (Code) then
2012                Error_Msg
2013                  ("identifier cannot start with digit character", Wptr);
2014                Scan_Ptr := Wptr;
2015                Name_Len := 0;
2016                Underline_Found := False;
2017                goto Scan_Identifier;
2018
2019             --  All other wide characters are illegal here
2020
2021             else
2022                Error_Illegal_Wide_Character;
2023                goto Scan_Next_Character;
2024             end if;
2025          end;
2026
2027       --  Routine to scan line terminator. On entry Scan_Ptr points to a
2028       --  character which is one of FF,LR,CR,VT, or one of the wide characters
2029       --  that is treated as a line termiantor.
2030
2031       <<Scan_Line_Terminator>>
2032
2033          --  Check line too long
2034
2035          Check_End_Of_Line;
2036
2037          --  Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2038          --  a physical line.
2039
2040          if End_Of_Line_Is_Token then
2041             Token_Ptr := Scan_Ptr;
2042          end if;
2043
2044          declare
2045             Physical : Boolean;
2046
2047          begin
2048             Skip_Line_Terminators (Scan_Ptr, Physical);
2049
2050             --  If we are at start of physical line, update scan pointers to
2051             --  reflect the start of the new line.
2052
2053             if Physical then
2054                Current_Line_Start       := Scan_Ptr;
2055                Start_Column             := Set_Start_Column;
2056                First_Non_Blank_Location := Scan_Ptr;
2057
2058                --  If End_Of_Line is a token, we return it as it is a
2059                --  physical line.
2060
2061                if End_Of_Line_Is_Token then
2062                   Token := Tok_End_Of_Line;
2063                   return;
2064                end if;
2065             end if;
2066          end;
2067
2068          goto Scan_Next_Character;
2069
2070       --  Identifier scanning routine. On entry, some initial characters of
2071       --  the identifier may have already been stored in Name_Buffer. If so,
2072       --  Name_Len has the number of characters stored. otherwise Name_Len is
2073       --  set to zero on entry. Underline_Found is also set False on entry.
2074
2075       <<Scan_Identifier>>
2076
2077          --  This loop scans as fast as possible past lower half letters and
2078          --  digits, which we expect to be the most common characters.
2079
2080          loop
2081             if Source (Scan_Ptr) in 'a' .. 'z'
2082               or else Source (Scan_Ptr) in '0' .. '9'
2083             then
2084                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2085                Accumulate_Checksum (Source (Scan_Ptr));
2086
2087             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2088                Name_Buffer (Name_Len + 1) :=
2089                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2090                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2091
2092             else
2093                exit;
2094             end if;
2095
2096             Underline_Found := False;
2097             Scan_Ptr := Scan_Ptr + 1;
2098             Name_Len := Name_Len + 1;
2099          end loop;
2100
2101          --  If we fall through, then we have encountered either an underline
2102          --  character, or an extended identifier character (i.e. one from the
2103          --  upper half), or a wide character, or an identifier terminator. The
2104          --  initial test speeds us up in the most common case where we have
2105          --  an identifier terminator. Note that ESC is an identifier character
2106          --  only if a wide character encoding method that uses ESC encoding
2107          --  is active, so if we find an ESC character we know that we have a
2108          --  wide character.
2109
2110          if Identifier_Char (Source (Scan_Ptr)) then
2111
2112             --  Case of underline
2113
2114             if Source (Scan_Ptr) = '_' then
2115                Accumulate_Checksum ('_');
2116
2117                if Underline_Found then
2118                   Error_No_Double_Underline;
2119                else
2120                   Underline_Found := True;
2121                   Name_Len := Name_Len + 1;
2122                   Name_Buffer (Name_Len) := '_';
2123                end if;
2124
2125                Scan_Ptr := Scan_Ptr + 1;
2126                goto Scan_Identifier;
2127
2128             --  Upper half character
2129
2130             elsif Source (Scan_Ptr) in Upper_Half_Character
2131               and then not Upper_Half_Encoding
2132             then
2133                Accumulate_Checksum (Source (Scan_Ptr));
2134                Store_Encoded_Character
2135                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2136                Scan_Ptr := Scan_Ptr + 1;
2137                Underline_Found := False;
2138                goto Scan_Identifier;
2139
2140             --  Left bracket not followed by a quote terminates an identifier.
2141             --  This is an error, but we don't want to give a junk error msg
2142             --  about wide characters in this case!
2143
2144             elsif Source (Scan_Ptr) = '['
2145               and then Source (Scan_Ptr + 1) /= '"'
2146             then
2147                null;
2148
2149             --  We know we have a wide character encoding here (the current
2150             --  character is either ESC, left bracket, or an upper half
2151             --  character depending on the encoding method).
2152
2153             else
2154                --  Scan out the wide character and insert the appropriate
2155                --  encoding into the name table entry for the identifier.
2156
2157                declare
2158                   Code   : Char_Code;
2159                   Err    : Boolean;
2160                   Chr    : Character;
2161
2162                begin
2163                   Wptr := Scan_Ptr;
2164                   Scan_Wide (Source, Scan_Ptr, Code, Err);
2165
2166                   --  If error, signal error
2167
2168                   if Err then
2169                      Error_Illegal_Wide_Character;
2170
2171                   --  If the character scanned is a normal identifier
2172                   --  character, then we treat it that way.
2173
2174                   elsif In_Character_Range (Code)
2175                     and then Identifier_Char (Get_Character (Code))
2176                   then
2177                      Chr := Get_Character (Code);
2178                      Accumulate_Checksum (Chr);
2179                      Store_Encoded_Character
2180                        (Get_Char_Code (Fold_Lower (Chr)));
2181                      Underline_Found := False;
2182
2183                   --  Here if not a normal identifier character
2184
2185                   else
2186                      --  Make sure we are allowing wide characters in
2187                      --  identifiers. Note that we allow wide character
2188                      --  notation for an OK identifier character. This in
2189                      --  particular allows bracket or other notation to be
2190                      --  used for upper half letters.
2191
2192                      --  Wide characters are always allowed in Ada 2005
2193
2194                      if Identifier_Character_Set /= 'w'
2195                        and then Ada_Version < Ada_05
2196                      then
2197                         Error_Msg
2198                        ("wide character not allowed in identifier", Wptr);
2199                      end if;
2200
2201                      --  If OK letter, store it folding to upper case. Note
2202                      --  that we include the folded letter in the checksum.
2203
2204                      if Is_UTF_32_Letter (Code) then
2205                         Code := UTF_32_To_Upper_Case (Code);
2206                         Accumulate_Checksum (Code);
2207                         Store_Encoded_Character (Code);
2208                         Underline_Found := False;
2209
2210                      --  If OK extended digit or mark, then store it
2211
2212                      elsif Is_UTF_32_Digit (Code)
2213                        or else Is_UTF_32_Mark (Code)
2214                      then
2215                         Accumulate_Checksum (Code);
2216                         Store_Encoded_Character (Code);
2217                         Underline_Found := False;
2218
2219                      --  Wide punctuation is also stored, but counts as an
2220                      --  underline character for error checking purposes.
2221
2222                      elsif Is_UTF_32_Punctuation (Code) then
2223                         Accumulate_Checksum (Code);
2224
2225                         if Underline_Found then
2226                            declare
2227                               Cend : constant Source_Ptr := Scan_Ptr;
2228                            begin
2229                               Scan_Ptr := Wptr;
2230                               Error_No_Double_Underline;
2231                               Scan_Ptr := Cend;
2232                            end;
2233
2234                         else
2235                            Store_Encoded_Character (Code);
2236                            Underline_Found := True;
2237                         end if;
2238
2239                      --  Wide character in Unicode cateogory "Other, Format"
2240                      --  is accepted in an identifier, but is ignored and not
2241                      --  stored. It seems reasonable to exclude it from the
2242                      --  checksum.
2243
2244                      elsif Is_UTF_32_Other (Code) then
2245                         null;
2246
2247                      --  Wide character in category Separator,Space terminates
2248
2249                      elsif Is_UTF_32_Space (Code) then
2250                         goto Scan_Identifier_Complete;
2251
2252                      --  Any other wide character is not acceptable
2253
2254                      else
2255                         Error_Msg
2256                           ("invalid wide character in identifier", Wptr);
2257                      end if;
2258                   end if;
2259
2260                   goto Scan_Identifier;
2261                end;
2262             end if;
2263          end if;
2264
2265       --  Scan of identifier is complete. The identifier is stored in
2266       --  Name_Buffer, and Scan_Ptr points past the last character.
2267
2268       <<Scan_Identifier_Complete>>
2269          Token_Name := Name_Find;
2270
2271          --  Check for identifier ending with underline or punctuation char
2272
2273          if Underline_Found then
2274             Underline_Found := False;
2275
2276             if Source (Scan_Ptr - 1) = '_' then
2277                Error_Msg
2278                  ("identifier cannot end with underline", Scan_Ptr - 1);
2279             else
2280                Error_Msg
2281                  ("identifier cannot end with punctuation character", Wptr);
2282             end if;
2283          end if;
2284
2285          --  Here is where we check if it was a keyword
2286
2287          if Get_Name_Table_Byte (Token_Name) /= 0
2288            and then (Ada_Version >= Ada_95
2289                        or else Token_Name not in Ada_95_Reserved_Words)
2290            and then (Ada_Version >= Ada_05
2291                        or else Token_Name not in Ada_2005_Reserved_Words)
2292          then
2293             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2294
2295             --  Deal with possible style check for non-lower case keyword, but
2296             --  we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2297             --  this purpose if they appear as attribute designators. Actually
2298             --  we only check the first character for speed.
2299
2300             --  Ada 2005 (AI-284): Do not apply the style check in case of
2301             --  "pragma Interface"
2302
2303             if Style_Check
2304               and then Source (Token_Ptr) <= 'Z'
2305               and then (Prev_Token /= Tok_Apostrophe
2306                           or else
2307                             (Token /= Tok_Access
2308                                and then Token /= Tok_Delta
2309                                and then Token /= Tok_Digits
2310                                and then Token /= Tok_Range))
2311               and then (Token /= Tok_Interface
2312                           or else
2313                             (Token = Tok_Interface
2314                                and then Prev_Token /= Tok_Pragma))
2315             then
2316                Style.Non_Lower_Case_Keyword;
2317             end if;
2318
2319             --  We must reset Token_Name since this is not an identifier and
2320             --  if we leave Token_Name set, the parser gets confused because
2321             --  it thinks it is dealing with an identifier instead of the
2322             --  corresponding keyword.
2323
2324             Token_Name := No_Name;
2325             Accumulate_Token_Checksum;
2326             return;
2327
2328          --  It is an identifier after all
2329
2330          else
2331             Token := Tok_Identifier;
2332             Accumulate_Token_Checksum;
2333             Post_Scan;
2334             return;
2335          end if;
2336    end Scan;
2337
2338    --------------------------
2339    -- Set_Comment_As_Token --
2340    --------------------------
2341
2342    procedure Set_Comment_As_Token (Value : Boolean) is
2343    begin
2344       Comment_Is_Token := Value;
2345    end Set_Comment_As_Token;
2346
2347    ------------------------------
2348    -- Set_End_Of_Line_As_Token --
2349    ------------------------------
2350
2351    procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2352    begin
2353       End_Of_Line_Is_Token := Value;
2354    end Set_End_Of_Line_As_Token;
2355
2356    ---------------------------
2357    -- Set_Special_Character --
2358    ---------------------------
2359
2360    procedure Set_Special_Character (C : Character) is
2361    begin
2362       case C is
2363          when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2364             Special_Characters (C) := True;
2365
2366          when others =>
2367             null;
2368       end case;
2369    end Set_Special_Character;
2370
2371    ----------------------
2372    -- Set_Start_Column --
2373    ----------------------
2374
2375    --  Note: it seems at first glance a little expensive to compute this value
2376    --  for every source line (since it is certainly not used for all source
2377    --  lines). On the other hand, it doesn't take much more work to skip past
2378    --  the initial white space on the line counting the columns than it would
2379    --  to scan past the white space using the standard scanning circuits.
2380
2381    function Set_Start_Column return Column_Number is
2382       Start_Column : Column_Number := 0;
2383
2384    begin
2385       --  Outer loop scans past horizontal tab characters
2386
2387       Tabs_Loop : loop
2388
2389          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2390          --  past the blanks and adjusting Start_Column to account for them.
2391
2392          Blanks_Loop : loop
2393             if Source (Scan_Ptr) = ' ' then
2394                if Source (Scan_Ptr + 1) = ' ' then
2395                   if Source (Scan_Ptr + 2) = ' ' then
2396                      if Source (Scan_Ptr + 3) = ' ' then
2397                         if Source (Scan_Ptr + 4) = ' ' then
2398                            if Source (Scan_Ptr + 5) = ' ' then
2399                               if Source (Scan_Ptr + 6) = ' ' then
2400                                  Scan_Ptr := Scan_Ptr + 7;
2401                                  Start_Column := Start_Column + 7;
2402                               else
2403                                  Scan_Ptr := Scan_Ptr + 6;
2404                                  Start_Column := Start_Column + 6;
2405                                  exit Blanks_Loop;
2406                               end if;
2407                            else
2408                               Scan_Ptr := Scan_Ptr + 5;
2409                               Start_Column := Start_Column + 5;
2410                               exit Blanks_Loop;
2411                            end if;
2412                         else
2413                            Scan_Ptr := Scan_Ptr + 4;
2414                            Start_Column := Start_Column + 4;
2415                            exit Blanks_Loop;
2416                         end if;
2417                      else
2418                         Scan_Ptr := Scan_Ptr + 3;
2419                         Start_Column := Start_Column + 3;
2420                         exit Blanks_Loop;
2421                      end if;
2422                   else
2423                      Scan_Ptr := Scan_Ptr + 2;
2424                      Start_Column := Start_Column + 2;
2425                      exit Blanks_Loop;
2426                   end if;
2427                else
2428                   Scan_Ptr := Scan_Ptr + 1;
2429                   Start_Column := Start_Column + 1;
2430                   exit Blanks_Loop;
2431                end if;
2432             else
2433                exit Blanks_Loop;
2434             end if;
2435          end loop Blanks_Loop;
2436
2437          --  Outer loop keeps going only if a horizontal tab follows
2438
2439          if Source (Scan_Ptr) = HT then
2440             if Style_Check then Style.Check_HT; end if;
2441             Scan_Ptr := Scan_Ptr + 1;
2442             Start_Column := (Start_Column / 8) * 8 + 8;
2443          else
2444             exit Tabs_Loop;
2445          end if;
2446
2447       end loop Tabs_Loop;
2448
2449       return Start_Column;
2450    end Set_Start_Column;
2451
2452 end Scng;